1717// This makes for some ugly code, but it is faster. Hopefully in the future
1818// with MIR support the compiler will get smarter about this.
1919
20- use std:: { str, slice, char } ;
20+ use std:: { str, slice } ;
2121use object:: Object ;
2222use number:: Number ;
2323use { JsonValue , Error , Result } ;
@@ -31,16 +31,6 @@ const MAX_PRECISION: u64 = 576460752303423500;
3131const DEPTH_LIMIT : usize = 512 ;
3232
3333
34- // Position is only used when we stumble upon an unexpected character. We don't
35- // track lines during parsing, as that would mean doing unnecessary work.
36- // Instead, if an error occurs, we figure out the line and column from the
37- // current index position of the parser.
38- struct Position {
39- pub line : usize ,
40- pub column : usize ,
41- }
42-
43-
4434// The `Parser` struct keeps track of indexing over our buffer. All niceness
4535// has been abandonned in favor of raw pointer magic. Does that make you feel
4636// dirty? _Good._
@@ -91,7 +81,7 @@ macro_rules! expect_sequence {
9181 $(
9282 match expect_byte!( $parser) {
9383 $ch => { } ,
94- ch => return $parser. unexpected_character( ch ) ,
84+ _ => return $parser. unexpected_character( ) ,
9585 }
9686 ) *
9787 }
@@ -131,9 +121,9 @@ macro_rules! expect_eof {
131121 while !$parser. is_eof( ) {
132122 match $parser. read_byte( ) {
133123 9 ... 13 | 32 => $parser. bump( ) ,
134- ch => {
124+ _ => {
135125 $parser. bump( ) ;
136- return $parser. unexpected_character( ch ) ;
126+ return $parser. unexpected_character( ) ;
137127 }
138128 }
139129 }
@@ -147,7 +137,7 @@ macro_rules! expect {
147137 let ch = expect_byte_ignore_whitespace!( $parser) ;
148138
149139 if ch != $byte {
150- return $parser. unexpected_character( ch )
140+ return $parser. unexpected_character( )
151141 }
152142 } ) ;
153143
@@ -158,7 +148,7 @@ macro_rules! expect {
158148 $(
159149 $byte => $then,
160150 ) *
161- _ => return $parser. unexpected_character( ch )
151+ _ => return $parser. unexpected_character( )
162152 }
163153
164154 } )
@@ -222,7 +212,7 @@ macro_rules! expect_string {
222212 break ;
223213 }
224214
225- return $parser. unexpected_character( ch ) ;
215+ return $parser. unexpected_character( ) ;
226216 }
227217
228218 result
@@ -324,7 +314,7 @@ macro_rules! expect_fraction {
324314 }
325315 }
326316 } ,
327- _ => return $parser. unexpected_character( ch )
317+ _ => return $parser. unexpected_character( )
328318 }
329319
330320 loop {
@@ -403,66 +393,28 @@ impl<'a> Parser<'a> {
403393 self . index = self . index . wrapping_add ( 1 ) ;
404394 }
405395
406- // Figure out the `Position` in the source. This doesn't look like it's
407- // very fast - it probably isn't, and it doesn't really have to be.
408- // This method is only called when an unexpected character error occurs.
409- fn source_position_from_index ( & self , index : usize ) -> Position {
410- let ( bytes, _) = self . source . split_at ( index-1 ) ;
411-
412- Position {
413- line : bytes. lines ( ) . count ( ) ,
414- column : bytes. lines ( ) . last ( ) . map_or ( 1 , |line| {
415- line. chars ( ) . count ( ) + 1
416- } )
417- }
418- }
419-
420396 // So we got an unexpected character, now what? Well, figure out where
421397 // it is, and throw an error!
422- fn unexpected_character < T : Sized > ( & mut self , byte : u8 ) -> Result < T > {
423- let pos = self . source_position_from_index ( self . index ) ;
424-
425- // If the first byte is non ASCII (> 127), attempt to read the
426- // codepoint from the following UTF-8 sequence. This can lead
427- // to a fun scenario where an unexpected character error can
428- // produce an end of json or UTF-8 failure error first :).
429- let ch = if byte & 0x80 != 0 {
430- let mut buf = [ byte, 0 , 0 , 0 ] ;
431- let mut len = 0usize ;
432-
433- if byte & 0xE0 == 0xCE {
434- // 2 bytes, 11 bits
435- len = 2 ;
436- buf[ 1 ] = expect_byte ! ( self ) ;
437- } else if byte & 0xF0 == 0xE0 {
438- // 3 bytes, 16 bits
439- len = 3 ;
440- buf[ 1 ] = expect_byte ! ( self ) ;
441- buf[ 2 ] = expect_byte ! ( self ) ;
442- } else if byte & 0xF8 == 0xF0 {
443- // 4 bytes, 21 bits
444- len = 4 ;
445- buf[ 1 ] = expect_byte ! ( self ) ;
446- buf[ 2 ] = expect_byte ! ( self ) ;
447- buf[ 3 ] = expect_byte ! ( self ) ;
448- }
398+ fn unexpected_character < T : Sized > ( & mut self ) -> Result < T > {
399+ let at = self . index - 1 ;
449400
450- let slice = try! (
451- str :: from_utf8 ( & buf [ 0 ..len ] )
452- . map_err ( |_| Error :: FailedUtf8Parsing )
453- ) ;
401+ let ch = self . source [ at.. ]
402+ . chars ( )
403+ . next ( )
404+ . expect ( "Must have a character" ) ;
454405
455- slice. chars ( ) . next ( ) . unwrap ( )
456- } else {
406+ let ( lineno, col) = self . source [ ..at]
407+ . lines ( )
408+ . enumerate ( )
409+ . last ( )
410+ . unwrap_or ( ( 0 , "" ) ) ;
457411
458- // codepoints < 128 are safe ASCII compatibles
459- unsafe { char:: from_u32_unchecked ( byte as u32 ) }
460- } ;
412+ let colno = col. chars ( ) . count ( ) ;
461413
462414 Err ( Error :: UnexpectedCharacter {
463415 ch : ch,
464- line : pos . line ,
465- column : pos . column ,
416+ line : lineno + 1 ,
417+ column : colno + 1 ,
466418 } )
467419 }
468420
@@ -473,7 +425,7 @@ impl<'a> Parser<'a> {
473425 b'0' ... b'9' => ( ch - b'0' ) ,
474426 b'a' ... b'f' => ( ch + 10 - b'a' ) ,
475427 b'A' ... b'F' => ( ch + 10 - b'A' ) ,
476- ch => return self . unexpected_character ( ch ) ,
428+ _ => return self . unexpected_character ( ) ,
477429 } as u32 )
478430 }
479431
@@ -575,11 +527,11 @@ impl<'a> Parser<'a> {
575527 b't' => b'\t' ,
576528 b'r' => b'\r' ,
577529 b'n' => b'\n' ,
578- _ => return self . unexpected_character ( escaped )
530+ _ => return self . unexpected_character ( )
579531 } ;
580532 self . buffer . push ( escaped) ;
581533 } ,
582- _ => return self . unexpected_character ( ch )
534+ _ => return self . unexpected_character ( )
583535 }
584536 ch = expect_byte ! ( self ) ;
585537 }
@@ -656,7 +608,7 @@ impl<'a> Parser<'a> {
656608
657609 let mut e = match ch {
658610 b'0' ... b'9' => ( ch - b'0' ) as i16 ,
659- _ => return self . unexpected_character ( ch ) ,
611+ _ => return self . unexpected_character ( ) ,
660612 } ;
661613
662614 loop {
@@ -708,7 +660,7 @@ impl<'a> Parser<'a> {
708660 let mut object = Object :: with_capacity ( 3 ) ;
709661
710662 if ch != b'"' {
711- return self . unexpected_character ( ch )
663+ return self . unexpected_character ( )
712664 }
713665
714666 object. insert ( expect_string ! ( self ) , JsonValue :: Null ) ;
@@ -733,7 +685,7 @@ impl<'a> Parser<'a> {
733685 JsonValue :: Number ( - match ch {
734686 b'0' => allow_number_extensions ! ( self ) ,
735687 b'1' ... b'9' => expect_number ! ( self , ch) ,
736- _ => return self . unexpected_character ( ch )
688+ _ => return self . unexpected_character ( )
737689 } )
738690 }
739691 b't' => {
@@ -748,7 +700,7 @@ impl<'a> Parser<'a> {
748700 expect_sequence ! ( self , b'u' , b'l' , b'l' ) ;
749701 JsonValue :: Null
750702 } ,
751- _ => return self . unexpected_character ( ch )
703+ _ => return self . unexpected_character ( )
752704 } ;
753705
754706 ' popping: loop {
@@ -776,7 +728,7 @@ impl<'a> Parser<'a> {
776728 value = JsonValue :: Array ( array) ;
777729 continue ' popping;
778730 } ,
779- _ => return self . unexpected_character ( ch )
731+ _ => return self . unexpected_character ( )
780732 }
781733 } ,
782734
@@ -802,7 +754,7 @@ impl<'a> Parser<'a> {
802754
803755 continue ' popping;
804756 } ,
805- _ => return self . unexpected_character ( ch )
757+ _ => return self . unexpected_character ( )
806758 }
807759 } ,
808760 }
0 commit comments