11//! Manage xml character escapes
22
3- use memchr;
43use std:: borrow:: Cow ;
54use std:: collections:: HashMap ;
65use std:: ops:: Range ;
76
7+ use jetscii:: bytes;
8+ use memchr;
9+ use once_cell:: sync:: Lazy ;
10+
811#[ cfg( test) ]
912use pretty_assertions:: assert_eq;
1013
14+ static XML_ESCAPE_BYTES : Lazy < jetscii:: BytesConst > =
15+ Lazy :: new ( || bytes ! ( b'<' , b'>' , b'&' , b'\'' , b'"' ) ) ;
16+ static XML_PARTIAL_ESCAPE_BYTES : Lazy < jetscii:: BytesConst > = Lazy :: new ( || bytes ! ( b'<' , b'>' , b'&' ) ) ;
17+
1118/// Error for XML escape/unescqpe.
1219#[ derive( Debug ) ]
1320pub enum EscapeError {
@@ -73,8 +80,8 @@ pub fn escape(raw: &[u8]) -> Cow<[u8]> {
7380 _ => false ,
7481 }
7582 }
76-
77- _escape ( raw, to_escape )
83+ // _escape(raw, to_escape)
84+ simd_escape ( raw, & XML_PARTIAL_ESCAPE_BYTES )
7885}
7986
8087/// Should only be used for escaping text content. In xml text content, it is allowed
@@ -89,8 +96,8 @@ pub fn partial_escape(raw: &[u8]) -> Cow<[u8]> {
8996 _ => false ,
9097 }
9198 }
92-
93- _escape ( raw, to_escape )
99+ // _escape(raw, to_escape)
100+ simd_escape ( raw, & XML_ESCAPE_BYTES )
94101}
95102
96103/// Escapes a `&[u8]` and replaces a subset of xml special characters (<, >, &, ', ") with their
@@ -127,6 +134,42 @@ fn _escape<F: Fn(u8) -> bool>(raw: &[u8], escape_chars: F) -> Cow<[u8]> {
127134 }
128135}
129136
137+ /// Escapes a `&[u8]` and replaces all xml special characters (<, >, &, ', ") with their
138+ /// corresponding xml escaped value.
139+ pub fn simd_escape < ' a > ( raw : & ' a [ u8 ] , escape_matcher : & jetscii:: BytesConst ) -> Cow < ' a , [ u8 ] > {
140+ let mut escaped = None ;
141+ let mut pos = 0 ;
142+ while let Some ( i) = escape_matcher. find ( & raw [ pos..] ) {
143+ if escaped. is_none ( ) {
144+ escaped = Some ( Vec :: with_capacity ( raw. len ( ) ) ) ;
145+ }
146+ let escaped = escaped. as_mut ( ) . expect ( "initialized" ) ;
147+ let new_pos = pos + i;
148+ escaped. extend_from_slice ( & raw [ pos..new_pos] ) ;
149+ match raw[ new_pos] {
150+ b'<' => escaped. extend_from_slice ( b"<" ) ,
151+ b'>' => escaped. extend_from_slice ( b">" ) ,
152+ b'\'' => escaped. extend_from_slice ( b"'" ) ,
153+ b'&' => escaped. extend_from_slice ( b"&" ) ,
154+ b'"' => escaped. extend_from_slice ( b""" ) ,
155+ c @ _ => unreachable ! (
156+ "Found {} but only '<', '>', ', '&' and '\" ' are escaped" ,
157+ c as char
158+ ) ,
159+ }
160+ pos = new_pos + 1 ;
161+ }
162+
163+ if let Some ( mut escaped) = escaped {
164+ if let Some ( raw) = raw. get ( pos..) {
165+ escaped. extend_from_slice ( raw) ;
166+ }
167+ Cow :: Owned ( escaped)
168+ } else {
169+ Cow :: Borrowed ( raw)
170+ }
171+ }
172+
130173/// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
131174/// value
132175pub fn unescape ( raw : & [ u8 ] ) -> Result < Cow < [ u8 ] > , EscapeError > {
0 commit comments