@@ -8,7 +8,7 @@ mod error;
88use core:: fmt;
99use core:: str:: FromStr ;
1010
11- pub use self :: error:: ParseThresholdError ;
11+ pub use self :: error:: { ParseThresholdError , ParseTreeError } ;
1212use crate :: prelude:: * ;
1313use crate :: { errstr, Error , Threshold , MAX_RECURSION_DEPTH } ;
1414
@@ -145,13 +145,107 @@ impl<'a> Tree<'a> {
145145 Self :: from_slice_delim ( sl, 0u32 , '(' )
146146 }
147147
148+ fn parse_pre_check ( s : & str , open : u8 , close : u8 ) -> Result < ( ) , ParseTreeError > {
149+ // First, scan through string to make sure it is well-formed.
150+ let mut max_depth = 0 ;
151+ // Do ASCII check first; after this we can use .bytes().enumerate() rather
152+ // than .char_indices(), which is *significantly* faster.
153+ for ( pos, ch) in s. char_indices ( ) {
154+ if !( 32 ..128 ) . contains ( & u32:: from ( ch) ) {
155+ return Err ( ParseTreeError :: InvalidCharacter { ch, pos } ) ;
156+ }
157+ }
158+
159+ let mut open_paren_stack = Vec :: with_capacity ( 128 ) ;
160+
161+ for ( pos, ch) in s. bytes ( ) . enumerate ( ) {
162+ if ch == open {
163+ open_paren_stack. push ( ( ch, pos) ) ;
164+ if max_depth < open_paren_stack. len ( ) {
165+ max_depth = open_paren_stack. len ( ) ;
166+ }
167+ } else if ch == close {
168+ if let Some ( ( open_ch, open_pos) ) = open_paren_stack. pop ( ) {
169+ if ( open_ch == b'(' && ch == b'}' ) || ( open_ch == b'{' && ch == b')' ) {
170+ return Err ( ParseTreeError :: MismatchedParens {
171+ open_ch : open_ch. into ( ) ,
172+ open_pos,
173+ close_ch : ch. into ( ) ,
174+ close_pos : pos,
175+ } ) ;
176+ }
177+
178+ if let Some ( & ( paren_ch, paren_pos) ) = open_paren_stack. last ( ) {
179+ // not last paren; this should not be the end of the string,
180+ // and the next character should be a , ) or }.
181+ if pos == s. len ( ) - 1 {
182+ return Err ( ParseTreeError :: UnmatchedOpenParen {
183+ ch : paren_ch. into ( ) ,
184+ pos : paren_pos,
185+ } ) ;
186+ } else {
187+ let next_byte = s. as_bytes ( ) [ pos + 1 ] ;
188+ if next_byte != b')' && next_byte != b'}' && next_byte != b',' {
189+ return Err ( ParseTreeError :: ExpectedParenOrComma {
190+ ch : next_byte. into ( ) ,
191+ pos : pos + 1 ,
192+ } ) ;
193+ //
194+ }
195+ }
196+ } else {
197+ // last paren; this SHOULD be the end of the string
198+ if pos < s. len ( ) - 1 {
199+ return Err ( ParseTreeError :: TrailingCharacter {
200+ ch : s. as_bytes ( ) [ pos + 1 ] . into ( ) ,
201+ pos : pos + 1 ,
202+ } ) ;
203+ }
204+ }
205+ } else {
206+ // In practice, this is only hit if there are no open parens at all.
207+ // If there are open parens, like in "())", then on the first ), we
208+ // would have returned TrailingCharacter in the previous clause.
209+ //
210+ // From a user point of view, UnmatchedCloseParen would probably be
211+ // a clearer error to get, but it complicates the parser to do this,
212+ // and "TralingCharacter" is technically correct, so we leave it for
213+ // now.
214+ return Err ( ParseTreeError :: UnmatchedCloseParen { ch : ch. into ( ) , pos } ) ;
215+ }
216+ } else if ch == b',' && open_paren_stack. is_empty ( ) {
217+ // We consider commas outside of the tree to be "trailing characters"
218+ return Err ( ParseTreeError :: TrailingCharacter { ch : ch. into ( ) , pos } ) ;
219+ }
220+ }
221+ // Catch "early end of string"
222+ if let Some ( ( ch, pos) ) = open_paren_stack. pop ( ) {
223+ return Err ( ParseTreeError :: UnmatchedOpenParen { ch : ch. into ( ) , pos } ) ;
224+ }
225+
226+ // FIXME should be able to remove this once we eliminate all recursion
227+ // in the library.
228+ if u32:: try_from ( max_depth) . unwrap_or ( u32:: MAX ) > MAX_RECURSION_DEPTH {
229+ return Err ( ParseTreeError :: MaxRecursionDepthExceeded {
230+ actual : max_depth,
231+ maximum : MAX_RECURSION_DEPTH ,
232+ } ) ;
233+ }
234+
235+ Ok ( ( ) )
236+ }
237+
148238 pub ( crate ) fn from_slice_delim (
149239 mut sl : & ' a str ,
150240 depth : u32 ,
151241 delim : char ,
152242 ) -> Result < ( Tree < ' a > , & ' a str ) , Error > {
153- if depth >= MAX_RECURSION_DEPTH {
154- return Err ( Error :: MaxRecursiveDepthExceeded ) ;
243+ if depth == 0 {
244+ if delim == '{' {
245+ Self :: parse_pre_check ( sl, b'{' , b'}' ) . map_err ( Error :: ParseTree ) ?;
246+ } else {
247+ Self :: parse_pre_check ( sl, b'(' , b')' ) . map_err ( Error :: ParseTree ) ?;
248+ }
155249 }
156250
157251 match next_expr ( sl, delim) {
@@ -171,7 +265,7 @@ impl<'a> Tree<'a> {
171265 ret. args . push ( arg) ;
172266
173267 if new_sl. is_empty ( ) {
174- return Err ( Error :: ExpectedChar ( closing_delim ( delim ) ) ) ;
268+ unreachable ! ( )
175269 }
176270
177271 sl = & new_sl[ 1 ..] ;
@@ -181,7 +275,7 @@ impl<'a> Tree<'a> {
181275 if last_byte == closing_delim ( delim) as u8 {
182276 break ;
183277 } else {
184- return Err ( Error :: ExpectedChar ( closing_delim ( delim ) ) ) ;
278+ unreachable ! ( )
185279 }
186280 }
187281 }
@@ -200,7 +294,7 @@ impl<'a> Tree<'a> {
200294 if rem. is_empty ( ) {
201295 Ok ( top)
202296 } else {
203- Err ( errstr ( rem ) )
297+ unreachable ! ( )
204298 }
205299 }
206300
@@ -337,36 +431,88 @@ mod tests {
337431 fn parse_tree_basic ( ) {
338432 assert_eq ! ( Tree :: from_str( "thresh" ) . unwrap( ) , leaf( "thresh" ) ) ;
339433
340- assert ! ( matches!( Tree :: from_str( "thresh," ) , Err ( Error :: Unexpected ( s) ) if s == "," ) ) ;
434+ assert ! ( matches!(
435+ Tree :: from_str( "thresh," ) . unwrap_err( ) ,
436+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch: ',' , pos: 6 } ) ,
437+ ) ) ;
341438
342439 assert ! ( matches!(
343- Tree :: from_str( "thresh,thresh" ) ,
344- Err ( Error :: Unexpected ( s ) ) if s == ",thresh" ,
440+ Tree :: from_str( "thresh,thresh" ) . unwrap_err ( ) ,
441+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch : ',' , pos : 6 } ) ,
345442 ) ) ;
346443
347444 assert ! ( matches!(
348- Tree :: from_str( "thresh()thresh()" ) ,
349- Err ( Error :: Unexpected ( s ) ) if s == "thresh()" ,
445+ Tree :: from_str( "thresh()thresh()" ) . unwrap_err ( ) ,
446+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch : 't' , pos : 8 } ) ,
350447 ) ) ;
351448
352449 assert_eq ! ( Tree :: from_str( "thresh()" ) . unwrap( ) , paren_node( "thresh" , vec![ leaf( "" ) ] ) ) ;
353450
354- // FIXME even for our current crappy error handling, this one is pretty bad
355- assert ! ( matches!( Tree :: from_str( "thresh(a()b)" ) , Err ( Error :: ExpectedChar ( ')' ) ) ) ) ;
451+ assert ! ( matches!(
452+ Tree :: from_str( "thresh(a()b)" ) ,
453+ Err ( Error :: ParseTree ( ParseTreeError :: ExpectedParenOrComma { ch: 'b' , pos: 10 } ) ) ,
454+ ) ) ;
356455
357- assert ! ( matches!( Tree :: from_str( "thresh()xyz" ) , Err ( Error :: Unexpected ( s) ) if s == "xyz" ) ) ;
456+ assert ! ( matches!(
457+ Tree :: from_str( "thresh()xyz" ) ,
458+ Err ( Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch: 'x' , pos: 8 } ) ) ,
459+ ) ) ;
358460 }
359461
360462 #[ test]
361463 fn parse_tree_parens ( ) {
362- assert ! ( matches!( Tree :: from_str( "a(" ) , Err ( Error :: ExpectedChar ( ')' ) ) ) ) ;
464+ assert ! ( matches!(
465+ Tree :: from_str( "a(" ) . unwrap_err( ) ,
466+ Error :: ParseTree ( ParseTreeError :: UnmatchedOpenParen { ch: '(' , pos: 1 } ) ,
467+ ) ) ;
468+
469+ assert ! ( matches!(
470+ Tree :: from_str( ")" ) . unwrap_err( ) ,
471+ Error :: ParseTree ( ParseTreeError :: UnmatchedCloseParen { ch: ')' , pos: 0 } ) ,
472+ ) ) ;
473+
474+ assert ! ( matches!(
475+ Tree :: from_str( "x(y))" ) . unwrap_err( ) ,
476+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch: ')' , pos: 4 } ) ,
477+ ) ) ;
478+
479+ /* Will be enabled in a later PR which unifies TR and non-TR parsing.
480+ assert!(matches!(
481+ Tree::from_str("a{").unwrap_err(),
482+ Error::ParseTree(ParseTreeError::UnmatchedOpenParen { ch: '{', pos: 1 }),
483+ ));
484+
485+ assert!(matches!(
486+ Tree::from_str("}").unwrap_err(),
487+ Error::ParseTree(ParseTreeError::UnmatchedCloseParen { ch: '}', pos: 0 }),
488+ ));
489+ */
363490
364- assert ! ( matches!( Tree :: from_str( ")" ) , Err ( Error :: Unexpected ( s) ) if s == ")" ) ) ;
491+ assert ! ( matches!(
492+ Tree :: from_str( "x(y)}" ) . unwrap_err( ) ,
493+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch: '}' , pos: 4 } ) ,
494+ ) ) ;
365495
366- assert ! ( matches!( Tree :: from_str( "x(y))" ) , Err ( Error :: Unexpected ( s) ) if s == ")" ) ) ;
496+ /* Will be enabled in a later PR which unifies TR and non-TR parsing.
497+ assert!(matches!(
498+ Tree::from_str("x{y)").unwrap_err(),
499+ Error::ParseTree(ParseTreeError::MismatchedParens {
500+ open_ch: '{',
501+ open_pos: 1,
502+ close_ch: ')',
503+ close_pos: 3,
504+ }),
505+ ));
506+ */
507+ }
367508
368- // In next commit will add tests related to {}s; currently we ignore
369- // these except in Taproot mode.
509+ #[ test]
510+ fn parse_tree_taproot ( ) {
511+ // This test will change in a later PR which unifies TR and non-TR parsing.
512+ assert ! ( matches!(
513+ Tree :: from_str( "a{b(c),d}" ) . unwrap_err( ) ,
514+ Error :: ParseTree ( ParseTreeError :: TrailingCharacter { ch: ',' , pos: 6 } ) ,
515+ ) ) ;
370516 }
371517
372518 #[ test]
0 commit comments