Skip to content

Commit 3388f3c

Browse files
Split SyntaxError::UnclosedPIOrXmlDecl into separate enum Variants
1 parent 646ce4b commit 3388f3c

File tree

8 files changed

+60
-27
lines changed

8 files changed

+60
-27
lines changed

src/errors.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@ pub enum SyntaxError {
1717
/// The parser started to parse `<!`, but the input ended before it can recognize
1818
/// anything.
1919
InvalidBangMarkup,
20-
/// The parser started to parse processing instruction or XML declaration (`<?`),
20+
/// The parser started to parse processing instruction (`<?`),
2121
/// but the input ended before the `?>` sequence was found.
22-
UnclosedPIOrXmlDecl,
22+
UnclosedPI,
23+
/// The parser started to parse XML declaration (`<?xml`),
24+
/// but the input ended before the `?>` sequence was found.
25+
UnclosedXmlDecl,
2326
/// The parser started to parse comment (`<!--`) content, but the input ended
2427
/// before the `-->` sequence was found.
2528
UnclosedComment,
@@ -38,8 +41,11 @@ impl fmt::Display for SyntaxError {
3841
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
3942
match self {
4043
Self::InvalidBangMarkup => f.write_str("unknown or missed symbol in markup"),
41-
Self::UnclosedPIOrXmlDecl => {
42-
f.write_str("processing instruction or xml declaration not closed: `?>` not found before end of input")
44+
Self::UnclosedPI => {
45+
f.write_str("processing instruction not closed: `?>` not found before end of input")
46+
}
47+
Self::UnclosedXmlDecl => {
48+
f.write_str("XML declaration not closed: `?>` not found before end of input")
4349
}
4450
Self::UnclosedComment => {
4551
f.write_str("comment not closed: `-->` not found before end of input")

src/parser/element.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ impl Parser for ElementParser {
7373
}
7474

7575
#[inline]
76-
fn eof_error() -> SyntaxError {
76+
fn eof_error(&self, _content: &[u8]) -> SyntaxError {
7777
SyntaxError::UnclosedTag
7878
}
7979
}

src/parser/mod.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,9 @@ pub trait Parser {
2525

2626
/// Returns parse error produced by this parser in case of reaching end of
2727
/// input without finding the end of a parsed thing.
28-
fn eof_error() -> SyntaxError;
28+
///
29+
/// # Parameters
30+
/// - `content`: the content that was read before EOF. Some parsers may use
31+
/// this to provide more specific error messages.
32+
fn eof_error(&self, content: &[u8]) -> SyntaxError;
2933
}

src/parser/pi.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
use crate::errors::SyntaxError;
44
use crate::parser::Parser;
5+
use crate::utils::is_whitespace;
56

67
/// A parser that search a `?>` sequence in the slice.
78
///
@@ -72,8 +73,18 @@ impl Parser for PiParser {
7273
}
7374

7475
#[inline]
75-
fn eof_error() -> SyntaxError {
76-
SyntaxError::UnclosedPIOrXmlDecl
76+
fn eof_error(&self, content: &[u8]) -> SyntaxError {
77+
// Check if content starts with "?xml" followed by whitespace, '?' or end.
78+
// This determines whether to report an unclosed XML declaration or PI.
79+
let is_xml_decl = content.starts_with(b"?xml")
80+
&& content
81+
.get(4)
82+
.map_or(true, |&b| is_whitespace(b) || b == b'?');
83+
if is_xml_decl {
84+
SyntaxError::UnclosedXmlDecl
85+
} else {
86+
SyntaxError::UnclosedPI
87+
}
7788
}
7889
}
7990

src/reader/buffered_reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ macro_rules! impl_buffered_source {
234234
}
235235

236236
*position += read;
237-
Err(Error::Syntax(P::eof_error()))
237+
Err(Error::Syntax(parser.eof_error(&buf[start..])))
238238
}
239239

240240
#[inline]

src/reader/slice_reader.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ impl<'a> XmlSource<'a, ()> for &'a [u8] {
350350
}
351351

352352
*position += self.len() as u64;
353-
Err(Error::Syntax(P::eof_error()))
353+
Err(Error::Syntax(parser.eof_error(self)))
354354
}
355355

356356
#[inline]

src/reader/state.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -270,11 +270,19 @@ impl ReaderState {
270270
)))
271271
}
272272
} else {
273-
// <?....EOF
274-
// ^^^^^ - `buf` does not contains `<`, but we want to report error at `<`,
273+
// <?....>
274+
// ^^^^^ - `buf` does not contain `<`, but we want to report error at `<`,
275275
// so we move offset to it (-2 for `<` and `>`)
276276
self.last_error_offset = self.offset - len as u64 - 2;
277-
Err(Error::Syntax(SyntaxError::UnclosedPIOrXmlDecl))
277+
278+
// Check if this is an XML declaration (starts with "?xml" followed by whitespace or "?")
279+
let is_xml_decl = buf.starts_with(b"?xml")
280+
&& buf.get(4).map_or(true, |&b| is_whitespace(b) || b == b'?');
281+
if is_xml_decl {
282+
Err(Error::Syntax(SyntaxError::UnclosedXmlDecl))
283+
} else {
284+
Err(Error::Syntax(SyntaxError::UnclosedPI))
285+
}
278286
}
279287
}
280288

tests/reader-errors.rs

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -430,16 +430,16 @@ mod syntax {
430430
mod pi {
431431
use super::*;
432432

433-
err!(unclosed01(".<?") => SyntaxError::UnclosedPIOrXmlDecl);
434-
err!(unclosed02(".<??") => SyntaxError::UnclosedPIOrXmlDecl);
435-
err!(unclosed03(".<?>") => SyntaxError::UnclosedPIOrXmlDecl);
436-
err!(unclosed04(".<?<") => SyntaxError::UnclosedPIOrXmlDecl);
437-
err!(unclosed05(".<?&") => SyntaxError::UnclosedPIOrXmlDecl);
438-
err!(unclosed06(".<?p") => SyntaxError::UnclosedPIOrXmlDecl);
439-
err!(unclosed07(".<? ") => SyntaxError::UnclosedPIOrXmlDecl);
440-
err!(unclosed08(".<?\t") => SyntaxError::UnclosedPIOrXmlDecl);
441-
err!(unclosed09(".<?\r") => SyntaxError::UnclosedPIOrXmlDecl);
442-
err!(unclosed10(".<?\n") => SyntaxError::UnclosedPIOrXmlDecl);
433+
err!(unclosed01(".<?") => SyntaxError::UnclosedPI);
434+
err!(unclosed02(".<??") => SyntaxError::UnclosedPI);
435+
err!(unclosed03(".<?>") => SyntaxError::UnclosedPI);
436+
err!(unclosed04(".<?<") => SyntaxError::UnclosedPI);
437+
err!(unclosed05(".<?&") => SyntaxError::UnclosedPI);
438+
err!(unclosed06(".<?p") => SyntaxError::UnclosedPI);
439+
err!(unclosed07(".<? ") => SyntaxError::UnclosedPI);
440+
err!(unclosed08(".<?\t") => SyntaxError::UnclosedPI);
441+
err!(unclosed09(".<?\r") => SyntaxError::UnclosedPI);
442+
err!(unclosed10(".<?\n") => SyntaxError::UnclosedPI);
443443

444444
// According to the grammar, processing instruction MUST contain a non-empty
445445
// target name, but we do not consider this as a _syntax_ error.
@@ -453,10 +453,14 @@ mod syntax {
453453
mod decl {
454454
use super::*;
455455

456-
err!(unclosed1(".<?x") => SyntaxError::UnclosedPIOrXmlDecl);
457-
err!(unclosed2(".<?xm") => SyntaxError::UnclosedPIOrXmlDecl);
458-
err!(unclosed3(".<?xml") => SyntaxError::UnclosedPIOrXmlDecl);
459-
err!(unclosed4(".<?xml?") => SyntaxError::UnclosedPIOrXmlDecl);
456+
err!(unclosed1(".<?x") => SyntaxError::UnclosedPI);
457+
err!(unclosed2(".<?xm") => SyntaxError::UnclosedPI);
458+
err!(unclosed3(".<?xml") => SyntaxError::UnclosedXmlDecl);
459+
err!(unclosed4(".<?xml?") => SyntaxError::UnclosedXmlDecl);
460+
err!(unclosed5(".<?xml ") => SyntaxError::UnclosedXmlDecl);
461+
err!(unclosed6(".<?xml\t") => SyntaxError::UnclosedXmlDecl);
462+
err!(unclosed7(".<?xml\r") => SyntaxError::UnclosedXmlDecl);
463+
err!(unclosed8(".<?xml\n") => SyntaxError::UnclosedXmlDecl);
460464

461465
// According to the grammar, XML declaration MUST contain at least one space
462466
// and `version` attribute, but we do not consider this as a _syntax_ error.

0 commit comments

Comments
 (0)