From a8e8a096a7867ce70b8a3c83f3d3d6b39dfd344e Mon Sep 17 00:00:00 2001 From: James Williams Date: Sat, 30 Aug 2025 11:52:33 -0700 Subject: [PATCH 1/4] Demonstrate whitespace bug. --- src/de/mod.rs | 10 ++++++++++ tests/serde-de.rs | 24 ++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 7671985b..35a83863 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -4641,6 +4641,16 @@ mod tests { assert_eq!(de.next().unwrap(), DeEvent::Eof); } + #[test] + fn space() { + let mut de = make_de(" "); + assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag"))); + assert_eq!(de.next().unwrap(), DeEvent::Text(" ".into())); + assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); + assert_eq!(de.next().unwrap(), DeEvent::Eof); + // Passes as expected + } + // start::text::text has no difference from start::text #[test] diff --git a/tests/serde-de.rs b/tests/serde-de.rs index cc82d5f6..04f169d4 100644 --- a/tests/serde-de.rs +++ b/tests/serde-de.rs @@ -45,12 +45,32 @@ mod text { content: String, } - let item: Item = from_str(r#"content"#).unwrap(); + let item: Item = from_str(r#"content "#).unwrap(); + // Passes as expected assert_eq!( item, Item { - content: "content".into() + content: "content ".into() + } + ); + } + + #[test] + fn explicit_space() { + #[derive(Debug, Deserialize, PartialEq)] + struct Item { + #[serde(rename = "$text")] + content: String, + } + + let item: Item = from_str(r#" "#).unwrap(); + + // Fails: called `Result::unwrap()` on an `Err` value: Custom("missing field `$text`") + assert_eq!( + item, + Item { + content: " ".into() } ); } From e2e0cad0075dbd2cb9ba20245beb896d688d9028 Mon Sep 17 00:00:00 2001 From: James Williams Date: Sun, 31 Aug 2025 07:11:13 -0700 Subject: [PATCH 2/4] Skip only whitespace with a line break. --- src/de/mod.rs | 11 +++++++++-- tests/serde-de.rs | 2 -- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 35a83863..953d8ce4 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -2789,8 +2789,16 @@ where fn skip_whitespaces(&mut self) -> Result<(), DeError> { loop { match self.peek()? { + // Skip only blank text nodes that contain a newline or carriage return + // (typical pretty-printed formatting). Preserve other blank text + // (e.g. single space) as they may be significant for some deserialization scenarios. DeEvent::Text(e) if e.is_blank() => { - self.next()?; + let contains_newline = e.text.chars().any(|c| c == '\n' || c == '\r'); + if contains_newline { + self.next()?; + continue; + } + break; } _ => break, } @@ -4648,7 +4656,6 @@ mod tests { assert_eq!(de.next().unwrap(), DeEvent::Text(" ".into())); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); - // Passes as expected } // start::text::text has no difference from start::text diff --git a/tests/serde-de.rs b/tests/serde-de.rs index 04f169d4..da171401 100644 --- a/tests/serde-de.rs +++ b/tests/serde-de.rs @@ -47,7 +47,6 @@ mod text { let item: Item = from_str(r#"content "#).unwrap(); - // Passes as expected assert_eq!( item, Item { @@ -66,7 +65,6 @@ mod text { let item: Item = from_str(r#" "#).unwrap(); - // Fails: called `Result::unwrap()` on an `Err` value: Custom("missing field `$text`") assert_eq!( item, Item { From 6a28e86948a77261cb62fc2c9b30a9aee370f0ad Mon Sep 17 00:00:00 2001 From: James Williams Date: Sun, 31 Aug 2025 12:58:33 -0700 Subject: [PATCH 3/4] Revert "Skip only whitespace with a line break." This reverts commit e2e0cad0075dbd2cb9ba20245beb896d688d9028. --- src/de/mod.rs | 11 ++--------- tests/serde-de.rs | 2 ++ 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 953d8ce4..35a83863 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -2789,16 +2789,8 @@ where fn skip_whitespaces(&mut self) -> Result<(), DeError> { loop { match self.peek()? { - // Skip only blank text nodes that contain a newline or carriage return - // (typical pretty-printed formatting). Preserve other blank text - // (e.g. single space) as they may be significant for some deserialization scenarios. DeEvent::Text(e) if e.is_blank() => { - let contains_newline = e.text.chars().any(|c| c == '\n' || c == '\r'); - if contains_newline { - self.next()?; - continue; - } - break; + self.next()?; } _ => break, } @@ -4656,6 +4648,7 @@ mod tests { assert_eq!(de.next().unwrap(), DeEvent::Text(" ".into())); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); + // Passes as expected } // start::text::text has no difference from start::text diff --git a/tests/serde-de.rs b/tests/serde-de.rs index da171401..04f169d4 100644 --- a/tests/serde-de.rs +++ b/tests/serde-de.rs @@ -47,6 +47,7 @@ mod text { let item: Item = from_str(r#"content "#).unwrap(); + // Passes as expected assert_eq!( item, Item { @@ -65,6 +66,7 @@ mod text { let item: Item = from_str(r#" "#).unwrap(); + // Fails: called `Result::unwrap()` on an `Err` value: Custom("missing field `$text`") assert_eq!( item, Item { From 399d550536f6c74ec5e824b85cb44706a4fa0837 Mon Sep 17 00:00:00 2001 From: James Williams Date: Sun, 31 Aug 2025 13:08:21 -0700 Subject: [PATCH 4/4] Skip whitespace for nodes containing a text event. --- src/de/map.rs | 9 ++++++++- src/de/mod.rs | 1 - tests/serde-de-seq.rs | 36 ++++-------------------------------- tests/serde-de.rs | 2 -- 4 files changed, 12 insertions(+), 36 deletions(-) diff --git a/src/de/map.rs b/src/de/map.rs index 2c1e8953..25103000 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -273,7 +273,14 @@ where QNameDeserializer::from_attr(QName(&slice[key]), decoder, &mut self.de.key_buf)?; seed.deserialize(de).map(Some) } else { - self.skip_whitespaces()?; + // If we have dedicated "$text" field, whitespace-only text may be significant. + // That also means, that type with `$text` fields behaves as if its element has + // `xml:space="preserve"` attribute: you must not have pretty-print indents + // inside this element. + if !self.has_text_field { + self.skip_whitespaces()?; + } + // try getting from events (value) match self.de.peek()? { // If we have dedicated "$text" field, it will not be passed to "$value" field diff --git a/src/de/mod.rs b/src/de/mod.rs index 35a83863..99dd63fa 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -4648,7 +4648,6 @@ mod tests { assert_eq!(de.next().unwrap(), DeEvent::Text(" ".into())); assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag"))); assert_eq!(de.next().unwrap(), DeEvent::Eof); - // Passes as expected } // start::text::text has no difference from start::text diff --git a/tests/serde-de-seq.rs b/tests/serde-de-seq.rs index d8287ba6..fa63f23c 100644 --- a/tests/serde-de-seq.rs +++ b/tests/serde-de-seq.rs @@ -831,14 +831,7 @@ mod fixed_name { } from_str::( - r#" - - - - text - - - "#, + r#"text"#, ) .unwrap(); } @@ -1688,14 +1681,7 @@ mod fixed_name { } let data: List = from_str( - r#" - - - - text - - - "#, + r#"text"#, ) .unwrap(); @@ -2946,14 +2932,7 @@ mod variable_name { } from_str::( - r#" - - - - text - - - "#, + r#"text"#, ) .unwrap(); } @@ -4015,14 +3994,7 @@ mod variable_name { } let data: List = from_str( - r#" - - - - text - - - "#, + r#"text"#, ) .unwrap(); diff --git a/tests/serde-de.rs b/tests/serde-de.rs index 04f169d4..da171401 100644 --- a/tests/serde-de.rs +++ b/tests/serde-de.rs @@ -47,7 +47,6 @@ mod text { let item: Item = from_str(r#"content "#).unwrap(); - // Passes as expected assert_eq!( item, Item { @@ -66,7 +65,6 @@ mod text { let item: Item = from_str(r#" "#).unwrap(); - // Fails: called `Result::unwrap()` on an `Err` value: Custom("missing field `$text`") assert_eq!( item, Item {