@@ -969,108 +969,12 @@ impl<'a> StringReader<'a> {
969969 'b' => {
970970 self . bump ( ) ;
971971 return match self . curr {
972- Some ( '\'' ) => parse_byte ( self ) ,
973- Some ( '"' ) => parse_byte_string ( self ) ,
974- Some ( 'r' ) => parse_raw_byte_string ( self ) ,
972+ Some ( '\'' ) => self . scan_byte ( ) ,
973+ Some ( '"' ) => self . scan_byte_string ( ) ,
974+ Some ( 'r' ) => self . scan_raw_byte_string ( ) ,
975975 _ => unreachable ! ( ) // Should have been a token::IDENT above.
976976 } ;
977977
978- fn parse_byte ( self_ : & mut StringReader ) -> token:: Token {
979- self_. bump ( ) ;
980- let start = self_. last_pos ;
981-
982- // the eof will be picked up by the final `'` check below
983- let mut c2 = self_. curr . unwrap_or ( '\x00' ) ;
984- self_. bump ( ) ;
985-
986- c2 = self_. scan_char_or_byte ( start, c2, /* ascii_only = */ true , '\'' ) . unwrap ( ) ;
987- if !self_. curr_is ( '\'' ) {
988- // Byte offsetting here is okay because the
989- // character before position `start` are an
990- // ascii single quote and ascii 'b'.
991- let last_pos = self_. last_pos ;
992- self_. fatal_span_verbose (
993- start - BytePos ( 2 ) , last_pos,
994- "unterminated byte constant" . to_string ( ) ) ;
995- }
996- self_. bump ( ) ; // advance curr past token
997- return token:: LIT_BYTE ( c2 as u8 ) ;
998- }
999-
1000- fn parse_byte_string ( self_ : & mut StringReader ) -> token:: Token {
1001- self_. bump ( ) ;
1002- let start = self_. last_pos ;
1003- let mut value = Vec :: new ( ) ;
1004- while !self_. curr_is ( '"' ) {
1005- if self_. is_eof ( ) {
1006- let last_pos = self_. last_pos ;
1007- self_. fatal_span_ ( start, last_pos,
1008- "unterminated double quote byte string" ) ;
1009- }
1010-
1011- let ch_start = self_. last_pos ;
1012- let ch = self_. curr . unwrap ( ) ;
1013- self_. bump ( ) ;
1014- self_. scan_char_or_byte ( ch_start, ch, /* ascii_only = */ true , '"' )
1015- . map ( |ch| value. push ( ch as u8 ) ) ;
1016- }
1017- self_. bump ( ) ;
1018- return token:: LIT_BINARY ( Rc :: new ( value) ) ;
1019- }
1020-
1021- fn parse_raw_byte_string ( self_ : & mut StringReader ) -> token:: Token {
1022- let start_bpos = self_. last_pos ;
1023- self_. bump ( ) ;
1024- let mut hash_count = 0 u;
1025- while self_. curr_is ( '#' ) {
1026- self_. bump ( ) ;
1027- hash_count += 1 ;
1028- }
1029-
1030- if self_. is_eof ( ) {
1031- let last_pos = self_. last_pos ;
1032- self_. fatal_span_ ( start_bpos, last_pos, "unterminated raw string" ) ;
1033- } else if !self_. curr_is ( '"' ) {
1034- let last_pos = self_. last_pos ;
1035- let ch = self_. curr . unwrap ( ) ;
1036- self_. fatal_span_char ( start_bpos, last_pos,
1037- "only `#` is allowed in raw string delimitation; \
1038- found illegal character",
1039- ch) ;
1040- }
1041- self_. bump ( ) ;
1042- let content_start_bpos = self_. last_pos ;
1043- let mut content_end_bpos;
1044- ' outer: loop {
1045- match self_. curr {
1046- None => {
1047- let last_pos = self_. last_pos ;
1048- self_. fatal_span_ ( start_bpos, last_pos, "unterminated raw string" )
1049- } ,
1050- Some ( '"' ) => {
1051- content_end_bpos = self_. last_pos ;
1052- for _ in range ( 0 , hash_count) {
1053- self_. bump ( ) ;
1054- if !self_. curr_is ( '#' ) {
1055- continue ' outer;
1056- }
1057- }
1058- break ;
1059- } ,
1060- Some ( c) => if c > '\x7F' {
1061- let last_pos = self_. last_pos ;
1062- self_. err_span_char (
1063- last_pos, last_pos, "raw byte string must be ASCII" , c) ;
1064- }
1065- }
1066- self_. bump ( ) ;
1067- }
1068- self_. bump ( ) ;
1069- let bytes = self_. with_str_from_to ( content_start_bpos,
1070- content_end_bpos,
1071- |s| s. as_bytes ( ) . to_owned ( ) ) ;
1072- return token:: LIT_BINARY_RAW ( Rc :: new ( bytes) , hash_count) ;
1073- }
1074978 }
1075979 '"' => {
1076980 let mut accum_str = String :: new ( ) ;
@@ -1221,6 +1125,103 @@ impl<'a> StringReader<'a> {
12211125 // consider shebangs comments, but not inner attributes
12221126 || ( self . curr_is ( '#' ) && self . nextch_is ( '!' ) && !self . nextnextch_is ( '[' ) )
12231127 }
1128+
1129+ fn scan_byte ( & mut self ) -> token:: Token {
1130+ self . bump ( ) ;
1131+ let start = self . last_pos ;
1132+
1133+ // the eof will be picked up by the final `'` check below
1134+ let mut c2 = self . curr . unwrap_or ( '\x00' ) ;
1135+ self . bump ( ) ;
1136+
1137+ c2 = self . scan_char_or_byte ( start, c2, /* ascii_only = */ true , '\'' ) . unwrap ( ) ;
1138+ if !self . curr_is ( '\'' ) {
1139+ // Byte offsetting here is okay because the
1140+ // character before position `start` are an
1141+ // ascii single quote and ascii 'b'.
1142+ let last_pos = self . last_pos ;
1143+ self . fatal_span_verbose (
1144+ start - BytePos ( 2 ) , last_pos,
1145+ "unterminated byte constant" . to_string ( ) ) ;
1146+ }
1147+ self . bump ( ) ; // advance curr past token
1148+ return token:: LIT_BYTE ( c2 as u8 ) ;
1149+ }
1150+
1151+ fn scan_byte_string ( & mut self ) -> token:: Token {
1152+ self . bump ( ) ;
1153+ let start = self . last_pos ;
1154+ let mut value = Vec :: new ( ) ;
1155+ while !self . curr_is ( '"' ) {
1156+ if self . is_eof ( ) {
1157+ let last_pos = self . last_pos ;
1158+ self . fatal_span_ ( start, last_pos,
1159+ "unterminated double quote byte string" ) ;
1160+ }
1161+
1162+ let ch_start = self . last_pos ;
1163+ let ch = self . curr . unwrap ( ) ;
1164+ self . bump ( ) ;
1165+ self . scan_char_or_byte ( ch_start, ch, /* ascii_only = */ true , '"' )
1166+ . map ( |ch| value. push ( ch as u8 ) ) ;
1167+ }
1168+ self . bump ( ) ;
1169+ return token:: LIT_BINARY ( Rc :: new ( value) ) ;
1170+ }
1171+
1172+ fn scan_raw_byte_string ( & mut self ) -> token:: Token {
1173+ let start_bpos = self . last_pos ;
1174+ self . bump ( ) ;
1175+ let mut hash_count = 0 u;
1176+ while self . curr_is ( '#' ) {
1177+ self . bump ( ) ;
1178+ hash_count += 1 ;
1179+ }
1180+
1181+ if self . is_eof ( ) {
1182+ let last_pos = self . last_pos ;
1183+ self . fatal_span_ ( start_bpos, last_pos, "unterminated raw string" ) ;
1184+ } else if !self . curr_is ( '"' ) {
1185+ let last_pos = self . last_pos ;
1186+ let ch = self . curr . unwrap ( ) ;
1187+ self . fatal_span_char ( start_bpos, last_pos,
1188+ "only `#` is allowed in raw string delimitation; \
1189+ found illegal character",
1190+ ch) ;
1191+ }
1192+ self . bump ( ) ;
1193+ let content_start_bpos = self . last_pos ;
1194+ let mut content_end_bpos;
1195+ ' outer: loop {
1196+ match self . curr {
1197+ None => {
1198+ let last_pos = self . last_pos ;
1199+ self . fatal_span_ ( start_bpos, last_pos, "unterminated raw string" )
1200+ } ,
1201+ Some ( '"' ) => {
1202+ content_end_bpos = self . last_pos ;
1203+ for _ in range ( 0 , hash_count) {
1204+ self . bump ( ) ;
1205+ if !self . curr_is ( '#' ) {
1206+ continue ' outer;
1207+ }
1208+ }
1209+ break ;
1210+ } ,
1211+ Some ( c) => if c > '\x7F' {
1212+ let last_pos = self . last_pos ;
1213+ self . err_span_char (
1214+ last_pos, last_pos, "raw byte string must be ASCII" , c) ;
1215+ }
1216+ }
1217+ self . bump ( ) ;
1218+ }
1219+ self . bump ( ) ;
1220+ let bytes = self . with_str_from_to ( content_start_bpos,
1221+ content_end_bpos,
1222+ |s| s. as_bytes ( ) . to_owned ( ) ) ;
1223+ return token:: LIT_BINARY_RAW ( Rc :: new ( bytes) , hash_count) ;
1224+ }
12241225}
12251226
12261227pub fn is_whitespace ( c : Option < char > ) -> bool {
0 commit comments