22
33use memchr;
44use std:: borrow:: Cow ;
5+ use std:: collections:: HashMap ;
56
67#[ derive( Debug ) ]
78pub enum EscapeError {
@@ -107,6 +108,32 @@ pub fn escape(raw: &[u8]) -> Cow<[u8]> {
107108/// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
108109/// value
109110pub fn unescape ( raw : & [ u8 ] ) -> Result < Cow < [ u8 ] > , EscapeError > {
111+ do_unescape ( raw, None )
112+ }
113+
114+ /// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
115+ /// value, using a dictionnary of custom entities.
116+ ///
117+ /// # Pre-condition
118+ ///
119+ /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
120+ pub fn unescape_with < ' a > (
121+ raw : & ' a [ u8 ] ,
122+ custom_entities : & HashMap < Vec < u8 > , Vec < u8 > > ,
123+ ) -> Result < Cow < ' a , [ u8 ] > , EscapeError > {
124+ do_unescape ( raw, Some ( custom_entities) )
125+ }
126+
127+ /// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
128+ /// value, using an optional dictionnary of custom entities.
129+ ///
130+ /// # Pre-condition
131+ ///
132+ /// The keys and values of `custom_entities`, if any, must be valid UTF-8.
133+ pub fn do_unescape < ' a > (
134+ raw : & ' a [ u8 ] ,
135+ custom_entities : Option < & HashMap < Vec < u8 > , Vec < u8 > > > ,
136+ ) -> Result < Cow < ' a , [ u8 ] > , EscapeError > {
110137 let mut unescaped = None ;
111138 let mut last_end = 0 ;
112139 let mut iter = memchr:: memchr2_iter ( b'&' , b';' , raw) ;
@@ -128,22 +155,27 @@ pub fn unescape(raw: &[u8]) -> Result<Cow<[u8]>, EscapeError> {
128155 b"amp" => unescaped. push ( b'&' ) ,
129156 b"apos" => unescaped. push ( b'\'' ) ,
130157 b"quot" => unescaped. push ( b'\"' ) ,
131- bytes => {
132- let code = if bytes. starts_with ( b"#x" ) {
133- parse_hexadecimal ( & bytes[ 2 ..] )
134- } else if bytes. starts_with ( b"#" ) {
135- parse_decimal ( & bytes[ 1 ..] )
158+ bytes if bytes. starts_with ( b"#" ) => {
159+ let bytes = & bytes[ 1 ..] ;
160+ let code = if bytes. starts_with ( b"x" ) {
161+ parse_hexadecimal ( & bytes[ 1 ..] )
136162 } else {
137- Err ( EscapeError :: UnrecognizedSymbol (
138- start + 1 ..end,
139- String :: from_utf8 ( bytes. to_vec ( ) ) ,
140- ) )
163+ parse_decimal ( & bytes)
141164 } ?;
142165 if code == 0 {
143166 return Err ( EscapeError :: EntityWithNull ( start..end) ) ;
144167 }
145168 push_utf8 ( unescaped, code) ;
146169 }
170+ bytes => match custom_entities. and_then ( |hm| hm. get ( bytes) ) {
171+ Some ( value) => unescaped. extend_from_slice ( & value) ,
172+ None => {
173+ return Err ( EscapeError :: UnrecognizedSymbol (
174+ start + 1 ..end,
175+ String :: from_utf8 ( bytes. to_vec ( ) ) ,
176+ ) )
177+ }
178+ } ,
147179 }
148180
149181 #[ cfg( feature = "escape-html" ) ]
@@ -5532,22 +5564,27 @@ pub fn unescape(raw: &[u8]) -> Result<Cow<[u8]>, EscapeError> {
55325564 unescaped. push ( b'\x1D' ) ;
55335565 unescaped. push ( b'\x56' ) ;
55345566 }
5535- bytes => {
5536- let code = if bytes. starts_with ( b"#x" ) {
5537- parse_hexadecimal ( & bytes[ 2 ..] )
5538- } else if bytes. starts_with ( b"#" ) {
5539- parse_decimal ( & bytes[ 1 ..] )
5567+ bytes if bytes. starts_with ( b"#" ) => {
5568+ let bytes = & bytes[ 1 ..] ;
5569+ let code = if bytes. starts_with ( b"x" ) {
5570+ parse_hexadecimal ( & bytes[ 1 ..] )
55405571 } else {
5541- Err ( EscapeError :: UnrecognizedSymbol (
5542- start + 1 ..end,
5543- String :: from_utf8 ( bytes. to_vec ( ) ) ,
5544- ) )
5572+ parse_decimal ( & bytes)
55455573 } ?;
55465574 if code == 0 {
55475575 return Err ( EscapeError :: EntityWithNull ( start..end) ) ;
55485576 }
55495577 push_utf8 ( unescaped, code) ;
55505578 }
5579+ bytes => match custom_entities. and_then ( |hm| hm. get ( bytes) ) {
5580+ Some ( value) => unescaped. extend_from_slice ( & value) ,
5581+ None => {
5582+ return Err ( EscapeError :: UnrecognizedSymbol (
5583+ start + 1 ..end,
5584+ String :: from_utf8 ( bytes. to_vec ( ) ) ,
5585+ ) )
5586+ }
5587+ } ,
55515588 }
55525589 last_end = end + 1 ;
55535590 }
@@ -5623,6 +5660,23 @@ fn test_unescape() {
56235660 assert_eq ! ( & * unescape( b"<test>" ) . unwrap( ) , b"<test>" ) ;
56245661 assert_eq ! ( & * unescape( b"0" ) . unwrap( ) , b"0" ) ;
56255662 assert_eq ! ( & * unescape( b"0" ) . unwrap( ) , b"0" ) ;
5663+ assert ! ( unescape( b"&foo;" ) . is_err( ) ) ;
5664+ }
5665+
5666+ #[ test]
5667+ fn test_unescape_with ( ) {
5668+ let custom_entities = vec ! [ ( b"foo" . to_vec( ) , b"BAR" . to_vec( ) ) ]
5669+ . into_iter ( )
5670+ . collect ( ) ;
5671+ assert_eq ! ( & * unescape_with( b"test" , & custom_entities) . unwrap( ) , b"test" ) ;
5672+ assert_eq ! (
5673+ & * unescape_with( b"<test>" , & custom_entities) . unwrap( ) ,
5674+ b"<test>"
5675+ ) ;
5676+ assert_eq ! ( & * unescape_with( b"0" , & custom_entities) . unwrap( ) , b"0" ) ;
5677+ assert_eq ! ( & * unescape_with( b"0" , & custom_entities) . unwrap( ) , b"0" ) ;
5678+ assert_eq ! ( & * unescape_with( b"&foo;" , & custom_entities) . unwrap( ) , b"BAR" ) ;
5679+ assert ! ( unescape_with( b"&fop;" , & custom_entities) . is_err( ) ) ;
56265680}
56275681
56285682#[ test]
0 commit comments