diff --git a/src/cli/diagnostics.rs b/src/cli/diagnostics.rs index 1ac87bc..9708bda 100644 --- a/src/cli/diagnostics.rs +++ b/src/cli/diagnostics.rs @@ -1,7 +1,7 @@ use annotate_snippets as ann; use rasur::{ error::{Error, InvalidScalarPlace}, - lexer::IdentKind, + lexer::{IdentKind, IdentMode}, parser::ExpectedFragment, span::Span, token::{Repr, Token, TokenKind}, @@ -81,12 +81,18 @@ fn convert(error: Error, cx: &RenderCx<'_>) -> Diag { Error::UnknownBuiltinSyntax(span) => Diag::new("unknown built-in syntax").highlight(span), Error::InvalidLetChain(span) => Diag::new("invalid let-chain").highlight(span), Error::ReuseInherentImpl => Diag::new("inherent impls cannot be reused"), - Error::InvalidRawIdent(IdentKind::Normal, span) => { + Error::InvalidIdent(IdentKind::Normal, IdentMode::Raw, span) => { Diag::new("invalid raw identifier").highlight(span) } - Error::InvalidRawIdent(IdentKind::Ticked, span) => { + Error::InvalidIdent(IdentKind::Normal, IdentMode::Keyword, span) => { + Diag::new("invalid stropped keyword").highlight(span) + } + Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, span) => { Diag::new("invalid raw ticked identifier").highlight(span) } + Error::InvalidIdent(IdentKind::Ticked, IdentMode::Keyword, span) => { + Diag::new("invalid stropped ticked keyword").highlight(span) + } Error::UnterminatedBlockComment(span) => { Diag::new("unterminated block comment").highlight(span) } diff --git a/src/lib/error.rs b/src/lib/error.rs index b5c7cf8..2c13704 100644 --- a/src/lib/error.rs +++ b/src/lib/error.rs @@ -1,4 +1,9 @@ -use crate::{lexer::IdentKind, parser::ExpectedFragment, span::Span, token::Token}; +use crate::{ + lexer::{IdentKind, IdentMode}, + parser::ExpectedFragment, + span::Span, + token::Token, +}; use Default::default; use std::cell::RefCell; @@ -79,9 +84,9 @@ pub enum Error { InvalidNumericIdent(Span), InvalidOpAfterCast(Span), InvalidParenthesizedBound, - InvalidRawIdent(IdentKind, Span), InvalidScalar(char, InvalidScalarPlace, Span), InvalidStrLitDelimiter(Span), + InvalidIdent(IdentKind, IdentMode, Span), InvalidTyPrefix(Span), LifetimeObjectTyWithoutPlus(Span), MisplacedReceiver(Span), diff --git a/src/lib/lexer.rs b/src/lib/lexer.rs index 77d16d3..7988d39 100644 --- a/src/lib/lexer.rs +++ b/src/lib/lexer.rs @@ -444,20 +444,30 @@ impl<'err, 'src> Lexer<'err, 'src> { return TokenKind::CharLit; } Some('#') if self.edition >= Edition::Rust2021 => { - if self.source(unticked) != "r" { - self.error(Error::ReservedPrefix(self.span(unticked))); - self.advance(); // `#` - return TokenKind::Error; - } - self.advance(); - - return match self.fin_lex_raw_ident(IdentKind::Ticked, start) { - Some(()) => TokenKind::TickedIdent, - None => { - self.error(Error::InvalidRawIdent(IdentKind::Ticked, self.span(start))); - TokenKind::Error + let mode = match self.source(unticked) { + // FEATURE: XXX + "k" => IdentMode::Keyword, + "r" => IdentMode::Raw, + _ => { + self.error(Error::ReservedPrefix(self.span(unticked))); + self.advance(); // `#` + return TokenKind::Error; } }; + self.advance(); + + return self.fin_lex_prefixed_ident( + IdentKind::Ticked, + mode, + None, + |ident, mode| match (mode, ident) { + | (IdentMode::Keyword, TokenKind::CommonIdent) + | (IdentMode::Raw, PathSegKeyword!() | TokenKind::Underscore) => false, + _ => true, + }, + |_| TokenKind::TickedIdent, + start, + ); } _ => return TokenKind::TickedIdent, } @@ -578,13 +588,30 @@ impl<'err, 'src> Lexer<'err, 'src> { self.advance(); return self.fin_lex_raw_guarded_str_lit(start); } + // FEATURE: XXX + ("k", Some('#')) if self.edition >= Edition::Rust2021 => { + self.advance(); + + return self.fin_lex_prefixed_ident( + IdentKind::Normal, + IdentMode::Keyword, + None, + |ident, _| ident != TokenKind::CommonIdent, + std::convert::identity, + start, + ); + } ("r", Some('#')) => { self.advance(); - return match self.fin_lex_raw_ident(IdentKind::Normal, start) { - Some(()) => TokenKind::CommonIdent, - None => self.fin_lex_raw_guarded_str_lit(start), - }; + return self.fin_lex_prefixed_ident( + IdentKind::Normal, + IdentMode::Raw, + Some(|this, start| this.fin_lex_raw_guarded_str_lit(start)), + |ident, _| !matches!(ident, PathSegKeyword!() | TokenKind::Underscore), + |_| TokenKind::CommonIdent, + start, + ); } (_, Some(char @ ('"' | '\'' | '#'))) if self.edition >= Edition::Rust2021 => { self.error(Error::ReservedPrefix(self.span(start))); @@ -599,22 +626,36 @@ impl<'err, 'src> Lexer<'err, 'src> { self.fin_lex_str_lit(raw, flavor, start) } - fn fin_lex_raw_ident(&mut self, kind: IdentKind, start: ByteIndex) -> Option<()> { + fn fin_lex_prefixed_ident( + &mut self, + kind: IdentKind, + mode: IdentMode, + fallback: Option TokenKind>, + validate: fn(TokenKind, IdentMode) -> bool, + map: fn(TokenKind) -> TokenKind, + start: ByteIndex, + ) -> TokenKind { if !self.peek().is_some_and(is_ident_start) { - return None; + return match fallback { + Some(fallback) => fallback(self, start), + None => { + self.error(Error::InvalidIdent(kind, mode, self.span(start))); + TokenKind::Error + } + }; } let unprefixed = self.index(); self.advance(); self.advance_while(is_ident_middle); - if let PathSegKeyword!() | TokenKind::Underscore = - lex_ident(self.source(unprefixed), self.edition) - { - self.error(Error::InvalidRawIdent(kind, self.span(start))); + let ident = lex_ident(self.source(unprefixed), self.edition); + + if !validate(ident, mode) { + self.error(Error::InvalidIdent(kind, mode, self.span(start))); } - Some(()) + map(ident) } // FIXME: Consolidate with `fin_lex_str_lit` smh @@ -775,6 +816,12 @@ pub enum IdentKind { Ticked, } +#[derive(Clone, Copy, Debug)] +pub enum IdentMode { + Raw, + Keyword, +} + #[derive(Clone, Copy)] enum TextLitKind { Str, diff --git a/src/lib/parser.rs b/src/lib/parser.rs index 5a9a9c6..fb2280c 100644 --- a/src/lib/parser.rs +++ b/src/lib/parser.rs @@ -105,6 +105,7 @@ impl<'tok, 'err, 'src> Parser<'tok, 'err, 'src> { self.advance(); let source = &self.source(span)[const { "'".len() }..]; + let source = source.strip_prefix("k#").unwrap_or(source); let ident = lex_ident(source, self.edition); if !validate(ident) { self.error(error(span)); diff --git a/src/lib/parser/test/misc.rs b/src/lib/parser/test/misc.rs index d44dce4..f5c2125 100644 --- a/src/lib/parser/test/misc.rs +++ b/src/lib/parser/test/misc.rs @@ -3,7 +3,7 @@ use crate::{ ast, edition::Edition::*, error::Error, - lexer::IdentKind, + lexer::{IdentKind, IdentMode}, token::{Token, TokenKind}, }; use deref as r; @@ -266,8 +266,8 @@ fn raw_idents() { Rust2015, "K!(r#self r#_);", Err(r!([ - Error::InvalidRawIdent(IdentKind::Normal, _), - Error::InvalidRawIdent(IdentKind::Normal, _) + Error::InvalidIdent(IdentKind::Normal, IdentMode::Raw, _), + Error::InvalidIdent(IdentKind::Normal, IdentMode::Raw, _), ])) ); @@ -352,12 +352,11 @@ fn raw_ticked_idents() { .. }) ); - t!( parse_item, Rust2021, "type R = &'r#_ ();", - Err(r!([Error::InvalidRawIdent(IdentKind::Ticked, _)])) + Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _)])) ); // We once used to accept this by mistake! @@ -367,16 +366,26 @@ fn raw_ticked_idents() { Rust2021, "seg!('r#self 'r#Self);", Err(r!([ - Error::InvalidRawIdent(IdentKind::Ticked, _), - Error::InvalidRawIdent(IdentKind::Ticked, _) + Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _), + Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _) ])) ); // We once used to accept this by mistake! - t!(parse_item, Rust2021, "W!('r#0);", Err(r!([Error::InvalidRawIdent(IdentKind::Ticked, _)]))); + t!( + parse_item, + Rust2021, + "W!('r#0);", + Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _)])) + ); // We once used to accept this by mistake treating it as an empty raw ticked ident! - t!(parse_item, Rust2021, "O!('r#);", Err(r!([Error::InvalidRawIdent(IdentKind::Ticked, _)]))); + t!( + parse_item, + Rust2021, + "O!('r#);", + Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _)])) + ); // We once used to accept this by mistake treating it as a multi-scalar char lit! t!( @@ -384,9 +393,107 @@ fn raw_ticked_idents() { Rust2021, "W!('r#');", Err(r!([ - Error::InvalidRawIdent(IdentKind::Ticked, _), + Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _), Error::UnterminatedCharLit(_), Error::MissingClosingDelimiters(_), ])) ); } + +#[test] +fn stropped_keywords() { + t!( + parse_item, + Rust2021, + "k#fn diverge() -> ! { k#loop {} }", + Ok(ast::Item { + kind: ast::ItemKind::Fn(r!(ast::FnItem { + binder: ast::Ident!("diverge"), + body: Some(ast::BlockExpr { + stmts: r!([ast::Stmt::Expr( + ast::Expr { kind: ast::ExprKind::Loop(..), .. }, + _ + )]) + }), + .. + })), + .. + }) + ); + + // Using a macro call to demonstrate that this is a lexical error even! + t!( + parse_item, + Rust2021, + "Q![ k# ];", + Err(r!([Error::InvalidIdent(IdentKind::Normal, IdentMode::Keyword, _)])) + ); + t!( + parse_item, + Rust2021, + "Q![ k#common ];", + Err(r!([Error::InvalidIdent(IdentKind::Normal, IdentMode::Keyword, _)])) + ); + + t!( + parse_expr, + Rust2018, + "k#loop {}", + Err(r!([Error::UnexpectedToken(Token { kind: TokenKind::Hash, .. }, _)])) + ); +} + +#[test] +fn stropped_ticked_keywords() { + // FIXME: Both `'r#static` and `'k#static` are legal. Does that really make sense? + // I'm already confused why `'r#_` is illegal but `'r#static` is not. + t!( + parse_ty, + Rust2021, + "&'k#static ()", + Ok(ast::Ty::Ref(r!(ast::RefTy { lt: Some(ast::Lifetime(ast::Ident!("static"))), .. }))) + ); + + t!( + parse_ty, + Rust2021, + "&'k#_ ()", + Ok(ast::Ty::Ref(r!(ast::RefTy { lt: Some(ast::Lifetime(ast::Ident!("_"))), .. }))) + ); + + // These are lexically accepted despite + // * `'static` not being a valid label and + // * `'if` not being a valid lifetime or label. + // That's fine because non-stropped ticked "keywords" are also lexically allowed. + // Only syntactically they may be invalid. + t!(parse_item, Rust2021, "T!( 'k#static );", Ok(_)); + t!(parse_item, Rust2021, "T!( 'k#if );", Ok(_)); + + // Using a macro call to demonstrate that this is a lexical error even! + t!( + parse_item, + Rust2021, + "T!( 'k#common );", + Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Keyword, _)])) + ); + + // Using a macro call to demonstrate that this is a lexical error even! + t!( + parse_item, + Rust2021, + "U!( 'k# );", + Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Keyword, _)])) + ); + + t!( + parse_ty, + Rust2018, + "&'k#static ();", + Err(r!([Error::UnexpectedToken(Token { kind: TokenKind::Hash, .. }, _)])) + ); + + // While `'k#static` is lexically valid, it's not syntactically valid as a label: + t!(parse_expr, Rust2021, "'k#static: loop {}", Err(r!([Error::ReservedLabel(_)]))); + + t!(parse_expr, Rust2021, "'k#_: loop {}", Err(r!([Error::ReservedLabel(_)]))); +}