@@ -685,7 +685,7 @@ impl<'a> StringReader<'a> {
685685 }
686686
687687
688- fn scan_numeric_escape ( & mut self , n_hex_digits : uint , delim : char ) -> char {
688+ fn scan_numeric_escape ( & mut self , n_hex_digits : uint , delim : char ) -> bool {
689689 let mut accum_int = 0u32 ;
690690 let start_bpos = self . last_pos ;
691691 for _ in range ( 0 , n_hex_digits) {
@@ -709,20 +709,22 @@ impl<'a> StringReader<'a> {
709709 }
710710
711711 match char:: from_u32 ( accum_int) {
712- Some ( x ) => x ,
712+ Some ( _ ) => true ,
713713 None => {
714714 let last_bpos = self . last_pos ;
715715 self . err_span_ ( start_bpos, last_bpos, "illegal numeric character escape" ) ;
716- '?'
716+ false
717717 }
718718 }
719719 }
720720
721721 /// Scan for a single (possibly escaped) byte or char
722722 /// in a byte, (non-raw) byte string, char, or (non-raw) string literal.
723723 /// `start` is the position of `first_source_char`, which is already consumed.
724+ ///
725+ /// Returns true if there was a valid char/byte, false otherwise.
724726 fn scan_char_or_byte ( & mut self , start : BytePos , first_source_char : char ,
725- ascii_only : bool , delim : char ) -> Option < char > {
727+ ascii_only : bool , delim : char ) -> bool {
726728 match first_source_char {
727729 '\\' => {
728730 // '\X' for some X must be a character constant:
@@ -732,24 +734,18 @@ impl<'a> StringReader<'a> {
732734 match escaped {
733735 None => { } , // EOF here is an error that will be checked later.
734736 Some ( e) => {
735- return Some ( match e {
736- 'n' => '\n' ,
737- 'r' => '\r' ,
738- 't' => '\t' ,
739- '\\' => '\\' ,
740- '\'' => '\'' ,
741- '"' => '"' ,
742- '0' => '\x00' ,
737+ return match e {
738+ 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true ,
743739 'x' => self . scan_numeric_escape ( 2 u, delim) ,
744740 'u' if !ascii_only => self . scan_numeric_escape ( 4 u, delim) ,
745741 'U' if !ascii_only => self . scan_numeric_escape ( 8 u, delim) ,
746742 '\n' if delim == '"' => {
747743 self . consume_whitespace ( ) ;
748- return None
744+ true
749745 } ,
750746 '\r' if delim == '"' && self . curr_is ( '\n' ) => {
751747 self . consume_whitespace ( ) ;
752- return None
748+ true
753749 }
754750 c => {
755751 let last_pos = self . last_pos ;
@@ -758,9 +754,9 @@ impl<'a> StringReader<'a> {
758754 if ascii_only { "unknown byte escape" }
759755 else { "unknown character escape" } ,
760756 c) ;
761- c
757+ false
762758 }
763- } )
759+ }
764760 }
765761 }
766762 }
@@ -771,14 +767,16 @@ impl<'a> StringReader<'a> {
771767 if ascii_only { "byte constant must be escaped" }
772768 else { "character constant must be escaped" } ,
773769 first_source_char) ;
770+ return false ;
774771 }
775772 '\r' => {
776773 if self . curr_is ( '\n' ) {
777774 self . bump ( ) ;
778- return Some ( '\n' ) ;
775+ return true ;
779776 } else {
780777 self . err_span_ ( start, self . last_pos ,
781778 "bare CR not allowed in string, use \\ r instead" ) ;
779+ return false ;
782780 }
783781 }
784782 _ => if ascii_only && first_source_char > '\x7F' {
@@ -787,9 +785,10 @@ impl<'a> StringReader<'a> {
787785 start, last_pos,
788786 "byte constant must be ASCII. \
789787 Use a \\ xHH escape for a non-ASCII byte", first_source_char) ;
788+ return false ;
790789 }
791790 }
792- Some ( first_source_char )
791+ true
793792 }
794793
795794 fn binop ( & mut self , op : token:: BinOp ) -> token:: Token {
@@ -924,7 +923,7 @@ impl<'a> StringReader<'a> {
924923 let start = self . last_pos ;
925924
926925 // the eof will be picked up by the final `'` check below
927- let mut c2 = self . curr . unwrap_or ( '\x00' ) ;
926+ let c2 = self . curr . unwrap_or ( '\x00' ) ;
928927 self . bump ( ) ;
929928
930929 // If the character is an ident start not followed by another single
@@ -967,7 +966,7 @@ impl<'a> StringReader<'a> {
967966 }
968967
969968 // Otherwise it is a character constant:
970- c2 = self . scan_char_or_byte ( start, c2, /* ascii_only = */ false , '\'' ) . unwrap ( ) ;
969+ let valid = self . scan_char_or_byte ( start, c2, /* ascii_only = */ false , '\'' ) ;
971970 if !self . curr_is ( '\'' ) {
972971 let last_bpos = self . last_pos ;
973972 self . fatal_span_verbose (
@@ -977,8 +976,9 @@ impl<'a> StringReader<'a> {
977976 start - BytePos ( 1 ) , last_bpos,
978977 "unterminated character constant" . to_string ( ) ) ;
979978 }
979+ let id = if valid { self . ident_from ( start) } else { str_to_ident ( "0" ) } ;
980980 self . bump ( ) ; // advance curr past token
981- return token:: LIT_CHAR ( c2 ) ;
981+ return token:: LIT_CHAR ( id ) ;
982982 }
983983 'b' => {
984984 self . bump ( ) ;
@@ -991,8 +991,8 @@ impl<'a> StringReader<'a> {
991991
992992 }
993993 '"' => {
994- let mut accum_str = String :: new ( ) ;
995994 let start_bpos = self . last_pos ;
995+ let mut valid = true ;
996996 self . bump ( ) ;
997997 while !self . curr_is ( '"' ) {
998998 if self . is_eof ( ) {
@@ -1003,11 +1003,13 @@ impl<'a> StringReader<'a> {
10031003 let ch_start = self . last_pos ;
10041004 let ch = self . curr . unwrap ( ) ;
10051005 self . bump ( ) ;
1006- self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ false , '"' )
1007- . map ( |ch| accum_str. push_char ( ch) ) ;
1006+ valid &= self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ false , '"' ) ;
10081007 }
1008+ // adjust for the ACSII " at the start of the literal
1009+ let id = if valid { self . ident_from ( start_bpos + BytePos ( 1 ) ) }
1010+ else { str_to_ident ( "??" ) } ;
10091011 self . bump ( ) ;
1010- return token:: LIT_STR ( str_to_ident ( accum_str . as_slice ( ) ) ) ;
1012+ return token:: LIT_STR ( id ) ;
10111013 }
10121014 'r' => {
10131015 let start_bpos = self . last_pos ;
@@ -1032,7 +1034,7 @@ impl<'a> StringReader<'a> {
10321034 self . bump ( ) ;
10331035 let content_start_bpos = self . last_pos ;
10341036 let mut content_end_bpos;
1035- let mut has_cr = false ;
1037+ let mut valid = true ;
10361038 ' outer: loop {
10371039 if self . is_eof ( ) {
10381040 let last_bpos = self . last_pos ;
@@ -1055,23 +1057,26 @@ impl<'a> StringReader<'a> {
10551057 }
10561058 }
10571059 break ;
1058- }
1060+ } ,
10591061 '\r' => {
1060- has_cr = true ;
1062+ if !self . nextch_is ( '\n' ) {
1063+ let last_bpos = self . last_pos ;
1064+ self . err_span_ ( start_bpos, last_bpos, "bare CR not allowed in raw \
1065+ string, use \\ r instead") ;
1066+ valid = false ;
1067+ }
10611068 }
10621069 _ => ( )
10631070 }
10641071 self . bump ( ) ;
10651072 }
10661073 self . bump ( ) ;
1067- let str_content = self . with_str_from_to ( content_start_bpos, content_end_bpos, |string| {
1068- let string = if has_cr {
1069- self . translate_crlf ( content_start_bpos, string,
1070- "bare CR not allowed in raw string" )
1071- } else { string. into_maybe_owned ( ) } ;
1072- str_to_ident ( string. as_slice ( ) )
1073- } ) ;
1074- return token:: LIT_STR_RAW ( str_content, hash_count) ;
1074+ let id = if valid {
1075+ self . ident_from_to ( content_start_bpos, content_end_bpos)
1076+ } else {
1077+ str_to_ident ( "??" )
1078+ } ;
1079+ return token:: LIT_STR_RAW ( id, hash_count) ;
10751080 }
10761081 '-' => {
10771082 if self . nextch_is ( '>' ) {
@@ -1145,10 +1150,10 @@ impl<'a> StringReader<'a> {
11451150 let start = self . last_pos ;
11461151
11471152 // the eof will be picked up by the final `'` check below
1148- let mut c2 = self . curr . unwrap_or ( '\x00' ) ;
1153+ let c2 = self . curr . unwrap_or ( '\x00' ) ;
11491154 self . bump ( ) ;
11501155
1151- c2 = self . scan_char_or_byte ( start, c2, /* ascii_only = */ true , '\'' ) . unwrap ( ) ;
1156+ let valid = self . scan_char_or_byte ( start, c2, /* ascii_only = */ true , '\'' ) ;
11521157 if !self . curr_is ( '\'' ) {
11531158 // Byte offsetting here is okay because the
11541159 // character before position `start` are an
@@ -1158,14 +1163,17 @@ impl<'a> StringReader<'a> {
11581163 start - BytePos ( 2 ) , last_pos,
11591164 "unterminated byte constant" . to_string ( ) ) ;
11601165 }
1166+
1167+ let id = if valid { self . ident_from ( start) } else { str_to_ident ( "??" ) } ;
11611168 self . bump ( ) ; // advance curr past token
1162- return token:: LIT_BYTE ( c2 as u8 ) ;
1169+ return token:: LIT_BYTE ( id ) ;
11631170 }
11641171
11651172 fn scan_byte_string ( & mut self ) -> token:: Token {
11661173 self . bump ( ) ;
11671174 let start = self . last_pos ;
1168- let mut value = Vec :: new ( ) ;
1175+ let mut valid = true ;
1176+
11691177 while !self . curr_is ( '"' ) {
11701178 if self . is_eof ( ) {
11711179 let last_pos = self . last_pos ;
@@ -1176,11 +1184,11 @@ impl<'a> StringReader<'a> {
11761184 let ch_start = self . last_pos ;
11771185 let ch = self . curr . unwrap ( ) ;
11781186 self . bump ( ) ;
1179- self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ true , '"' )
1180- . map ( |ch| value. push ( ch as u8 ) ) ;
1187+ valid &= self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ true , '"' ) ;
11811188 }
1189+ let id = if valid { self . ident_from ( start) } else { str_to_ident ( "??" ) } ;
11821190 self . bump ( ) ;
1183- return token:: LIT_BINARY ( Rc :: new ( value ) ) ;
1191+ return token:: LIT_BINARY ( id ) ;
11841192 }
11851193
11861194 fn scan_raw_byte_string ( & mut self ) -> token:: Token {
@@ -1231,10 +1239,8 @@ impl<'a> StringReader<'a> {
12311239 self . bump ( ) ;
12321240 }
12331241 self . bump ( ) ;
1234- let bytes = self . with_str_from_to ( content_start_bpos,
1235- content_end_bpos,
1236- |s| s. as_bytes ( ) . to_owned ( ) ) ;
1237- return token:: LIT_BINARY_RAW ( Rc :: new ( bytes) , hash_count) ;
1242+ return token:: LIT_BINARY_RAW ( self . ident_from_to ( content_start_bpos, content_end_bpos) ,
1243+ hash_count) ;
12381244 }
12391245}
12401246
0 commit comments