From 38154e01320c9566d91d025647496a484e3c93eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Le=C3=B3n=20Orell=20Valerian=20Liehr?= Date: Wed, 11 Mar 2026 13:30:03 +0100 Subject: [PATCH 1/2] Lex stropped keywords (`k#keyword`) --- src/cli/diagnostics.rs | 3 ++ src/lib/error.rs | 1 + src/lib/lexer.rs | 68 +++++++++++++++++++++++++++---------- src/lib/parser/test/misc.rs | 33 ++++++++++++++++++ 4 files changed, 87 insertions(+), 18 deletions(-) diff --git a/src/cli/diagnostics.rs b/src/cli/diagnostics.rs index 1ac87bc..aee2f54 100644 --- a/src/cli/diagnostics.rs +++ b/src/cli/diagnostics.rs @@ -137,6 +137,9 @@ fn convert(error: Error, cx: &RenderCx<'_>) -> Diag { Error::ForbiddenOuterAttrs => Diag::new("outer attributes are forbidden in this context"), Error::InvalidNumericIdent(span) => Diag::new("invalid numeric identifier").highlight(span), Error::AbiStrSuffix(span) => Diag::new("suffix on ABI string").highlight(span), + Error::InvalidStroppedKeyword(span) => { + Diag::new("invalid stropped keyword").highlight(span) + } } } diff --git a/src/lib/error.rs b/src/lib/error.rs index b5c7cf8..c1c23b4 100644 --- a/src/lib/error.rs +++ b/src/lib/error.rs @@ -82,6 +82,7 @@ pub enum Error { InvalidRawIdent(IdentKind, Span), InvalidScalar(char, InvalidScalarPlace, Span), InvalidStrLitDelimiter(Span), + InvalidStroppedKeyword(Span), InvalidTyPrefix(Span), LifetimeObjectTyWithoutPlus(Span), MisplacedReceiver(Span), diff --git a/src/lib/lexer.rs b/src/lib/lexer.rs index 77d16d3..c4596fa 100644 --- a/src/lib/lexer.rs +++ b/src/lib/lexer.rs @@ -451,13 +451,14 @@ impl<'err, 'src> Lexer<'err, 'src> { } self.advance(); - return match self.fin_lex_raw_ident(IdentKind::Ticked, start) { - Some(()) => TokenKind::TickedIdent, - None => { - self.error(Error::InvalidRawIdent(IdentKind::Ticked, self.span(start))); - TokenKind::Error - } - }; + return self.fin_lex_prefixed_ident( + IdentKind::Ticked, + None, + |ident| !matches!(ident, PathSegKeyword!() | TokenKind::Underscore), + Error::InvalidRawIdent, + |_| TokenKind::TickedIdent, + start, + ); } _ => return TokenKind::TickedIdent, } @@ -578,13 +579,30 @@ impl<'err, 'src> Lexer<'err, 'src> { self.advance(); return self.fin_lex_raw_guarded_str_lit(start); } + // FEATURE: XXX + ("k", Some('#')) if self.edition >= Edition::Rust2021 => { + self.advance(); + + return self.fin_lex_prefixed_ident( + IdentKind::Normal, + None, + |ident| ident != TokenKind::CommonIdent, + |_, span| Error::InvalidStroppedKeyword(span), + std::convert::identity, + start, + ); + } ("r", Some('#')) => { self.advance(); - return match self.fin_lex_raw_ident(IdentKind::Normal, start) { - Some(()) => TokenKind::CommonIdent, - None => self.fin_lex_raw_guarded_str_lit(start), - }; + return self.fin_lex_prefixed_ident( + IdentKind::Normal, + Some(|this, start| this.fin_lex_raw_guarded_str_lit(start)), + |ident| !matches!(ident, PathSegKeyword!() | TokenKind::Underscore), + Error::InvalidRawIdent, + |_| TokenKind::CommonIdent, + start, + ); } (_, Some(char @ ('"' | '\'' | '#'))) if self.edition >= Edition::Rust2021 => { self.error(Error::ReservedPrefix(self.span(start))); @@ -599,22 +617,36 @@ impl<'err, 'src> Lexer<'err, 'src> { self.fin_lex_str_lit(raw, flavor, start) } - fn fin_lex_raw_ident(&mut self, kind: IdentKind, start: ByteIndex) -> Option<()> { + fn fin_lex_prefixed_ident( + &mut self, + kind: IdentKind, + fallback: Option TokenKind>, + validate: fn(TokenKind) -> bool, + error: fn(IdentKind, Span) -> Error, + map: fn(TokenKind) -> TokenKind, + start: ByteIndex, + ) -> TokenKind { if !self.peek().is_some_and(is_ident_start) { - return None; + return match fallback { + Some(fallback) => fallback(self, start), + None => { + self.error(error(kind, self.span(start))); + TokenKind::Error + } + }; } let unprefixed = self.index(); self.advance(); self.advance_while(is_ident_middle); - if let PathSegKeyword!() | TokenKind::Underscore = - lex_ident(self.source(unprefixed), self.edition) - { - self.error(Error::InvalidRawIdent(kind, self.span(start))); + let ident = lex_ident(self.source(unprefixed), self.edition); + + if !validate(ident) { + self.error(error(kind, self.span(start))); } - Some(()) + map(ident) } // FIXME: Consolidate with `fin_lex_str_lit` smh diff --git a/src/lib/parser/test/misc.rs b/src/lib/parser/test/misc.rs index d44dce4..15bef3d 100644 --- a/src/lib/parser/test/misc.rs +++ b/src/lib/parser/test/misc.rs @@ -390,3 +390,36 @@ fn raw_ticked_idents() { ])) ); } + +#[test] +fn stropped_keywords() { + t!( + parse_item, + Rust2021, + "k#fn diverge() -> ! { k#loop {} }", + Ok(ast::Item { + kind: ast::ItemKind::Fn(r!(ast::FnItem { + binder: ast::Ident!("diverge"), + body: Some(ast::BlockExpr { + stmts: r!([ast::Stmt::Expr( + ast::Expr { kind: ast::ExprKind::Loop(..), .. }, + _ + )]) + }), + .. + })), + .. + }) + ); + + // Using a macro call to demonstrate that this is a lexical error even! + t!(parse_item, Rust2021, "Q![ k# ];", Err(r!([Error::InvalidStroppedKeyword(_)]))); + t!(parse_item, Rust2021, "Q![ k#common ];", Err(r!([Error::InvalidStroppedKeyword(_)]))); + + t!( + parse_expr, + Rust2018, + "k#loop {}", + Err(r!([Error::UnexpectedToken(Token { kind: TokenKind::Hash, .. }, _)])) + ); +} From 53971a141ac56021ce3ff7619e06fe543c72a0db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Le=C3=B3n=20Orell=20Valerian=20Liehr?= Date: Wed, 11 Mar 2026 02:34:58 +0100 Subject: [PATCH 2/2] Lex stropped ticked keywords (`'k#keyword'`) --- src/cli/diagnostics.rs | 15 +++--- src/lib/error.rs | 10 ++-- src/lib/lexer.rs | 47 ++++++++++++------ src/lib/parser.rs | 1 + src/lib/parser/test/misc.rs | 98 ++++++++++++++++++++++++++++++++----- 5 files changed, 134 insertions(+), 37 deletions(-) diff --git a/src/cli/diagnostics.rs b/src/cli/diagnostics.rs index aee2f54..9708bda 100644 --- a/src/cli/diagnostics.rs +++ b/src/cli/diagnostics.rs @@ -1,7 +1,7 @@ use annotate_snippets as ann; use rasur::{ error::{Error, InvalidScalarPlace}, - lexer::IdentKind, + lexer::{IdentKind, IdentMode}, parser::ExpectedFragment, span::Span, token::{Repr, Token, TokenKind}, @@ -81,12 +81,18 @@ fn convert(error: Error, cx: &RenderCx<'_>) -> Diag { Error::UnknownBuiltinSyntax(span) => Diag::new("unknown built-in syntax").highlight(span), Error::InvalidLetChain(span) => Diag::new("invalid let-chain").highlight(span), Error::ReuseInherentImpl => Diag::new("inherent impls cannot be reused"), - Error::InvalidRawIdent(IdentKind::Normal, span) => { + Error::InvalidIdent(IdentKind::Normal, IdentMode::Raw, span) => { Diag::new("invalid raw identifier").highlight(span) } - Error::InvalidRawIdent(IdentKind::Ticked, span) => { + Error::InvalidIdent(IdentKind::Normal, IdentMode::Keyword, span) => { + Diag::new("invalid stropped keyword").highlight(span) + } + Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, span) => { Diag::new("invalid raw ticked identifier").highlight(span) } + Error::InvalidIdent(IdentKind::Ticked, IdentMode::Keyword, span) => { + Diag::new("invalid stropped ticked keyword").highlight(span) + } Error::UnterminatedBlockComment(span) => { Diag::new("unterminated block comment").highlight(span) } @@ -137,9 +143,6 @@ fn convert(error: Error, cx: &RenderCx<'_>) -> Diag { Error::ForbiddenOuterAttrs => Diag::new("outer attributes are forbidden in this context"), Error::InvalidNumericIdent(span) => Diag::new("invalid numeric identifier").highlight(span), Error::AbiStrSuffix(span) => Diag::new("suffix on ABI string").highlight(span), - Error::InvalidStroppedKeyword(span) => { - Diag::new("invalid stropped keyword").highlight(span) - } } } diff --git a/src/lib/error.rs b/src/lib/error.rs index c1c23b4..2c13704 100644 --- a/src/lib/error.rs +++ b/src/lib/error.rs @@ -1,4 +1,9 @@ -use crate::{lexer::IdentKind, parser::ExpectedFragment, span::Span, token::Token}; +use crate::{ + lexer::{IdentKind, IdentMode}, + parser::ExpectedFragment, + span::Span, + token::Token, +}; use Default::default; use std::cell::RefCell; @@ -79,10 +84,9 @@ pub enum Error { InvalidNumericIdent(Span), InvalidOpAfterCast(Span), InvalidParenthesizedBound, - InvalidRawIdent(IdentKind, Span), InvalidScalar(char, InvalidScalarPlace, Span), InvalidStrLitDelimiter(Span), - InvalidStroppedKeyword(Span), + InvalidIdent(IdentKind, IdentMode, Span), InvalidTyPrefix(Span), LifetimeObjectTyWithoutPlus(Span), MisplacedReceiver(Span), diff --git a/src/lib/lexer.rs b/src/lib/lexer.rs index c4596fa..7988d39 100644 --- a/src/lib/lexer.rs +++ b/src/lib/lexer.rs @@ -444,18 +444,27 @@ impl<'err, 'src> Lexer<'err, 'src> { return TokenKind::CharLit; } Some('#') if self.edition >= Edition::Rust2021 => { - if self.source(unticked) != "r" { - self.error(Error::ReservedPrefix(self.span(unticked))); - self.advance(); // `#` - return TokenKind::Error; - } + let mode = match self.source(unticked) { + // FEATURE: XXX + "k" => IdentMode::Keyword, + "r" => IdentMode::Raw, + _ => { + self.error(Error::ReservedPrefix(self.span(unticked))); + self.advance(); // `#` + return TokenKind::Error; + } + }; self.advance(); return self.fin_lex_prefixed_ident( IdentKind::Ticked, + mode, None, - |ident| !matches!(ident, PathSegKeyword!() | TokenKind::Underscore), - Error::InvalidRawIdent, + |ident, mode| match (mode, ident) { + | (IdentMode::Keyword, TokenKind::CommonIdent) + | (IdentMode::Raw, PathSegKeyword!() | TokenKind::Underscore) => false, + _ => true, + }, |_| TokenKind::TickedIdent, start, ); @@ -585,9 +594,9 @@ impl<'err, 'src> Lexer<'err, 'src> { return self.fin_lex_prefixed_ident( IdentKind::Normal, + IdentMode::Keyword, None, - |ident| ident != TokenKind::CommonIdent, - |_, span| Error::InvalidStroppedKeyword(span), + |ident, _| ident != TokenKind::CommonIdent, std::convert::identity, start, ); @@ -597,9 +606,9 @@ impl<'err, 'src> Lexer<'err, 'src> { return self.fin_lex_prefixed_ident( IdentKind::Normal, + IdentMode::Raw, Some(|this, start| this.fin_lex_raw_guarded_str_lit(start)), - |ident| !matches!(ident, PathSegKeyword!() | TokenKind::Underscore), - Error::InvalidRawIdent, + |ident, _| !matches!(ident, PathSegKeyword!() | TokenKind::Underscore), |_| TokenKind::CommonIdent, start, ); @@ -620,9 +629,9 @@ impl<'err, 'src> Lexer<'err, 'src> { fn fin_lex_prefixed_ident( &mut self, kind: IdentKind, + mode: IdentMode, fallback: Option TokenKind>, - validate: fn(TokenKind) -> bool, - error: fn(IdentKind, Span) -> Error, + validate: fn(TokenKind, IdentMode) -> bool, map: fn(TokenKind) -> TokenKind, start: ByteIndex, ) -> TokenKind { @@ -630,7 +639,7 @@ impl<'err, 'src> Lexer<'err, 'src> { return match fallback { Some(fallback) => fallback(self, start), None => { - self.error(error(kind, self.span(start))); + self.error(Error::InvalidIdent(kind, mode, self.span(start))); TokenKind::Error } }; @@ -642,8 +651,8 @@ impl<'err, 'src> Lexer<'err, 'src> { let ident = lex_ident(self.source(unprefixed), self.edition); - if !validate(ident) { - self.error(error(kind, self.span(start))); + if !validate(ident, mode) { + self.error(Error::InvalidIdent(kind, mode, self.span(start))); } map(ident) @@ -807,6 +816,12 @@ pub enum IdentKind { Ticked, } +#[derive(Clone, Copy, Debug)] +pub enum IdentMode { + Raw, + Keyword, +} + #[derive(Clone, Copy)] enum TextLitKind { Str, diff --git a/src/lib/parser.rs b/src/lib/parser.rs index 5a9a9c6..fb2280c 100644 --- a/src/lib/parser.rs +++ b/src/lib/parser.rs @@ -105,6 +105,7 @@ impl<'tok, 'err, 'src> Parser<'tok, 'err, 'src> { self.advance(); let source = &self.source(span)[const { "'".len() }..]; + let source = source.strip_prefix("k#").unwrap_or(source); let ident = lex_ident(source, self.edition); if !validate(ident) { self.error(error(span)); diff --git a/src/lib/parser/test/misc.rs b/src/lib/parser/test/misc.rs index 15bef3d..f5c2125 100644 --- a/src/lib/parser/test/misc.rs +++ b/src/lib/parser/test/misc.rs @@ -3,7 +3,7 @@ use crate::{ ast, edition::Edition::*, error::Error, - lexer::IdentKind, + lexer::{IdentKind, IdentMode}, token::{Token, TokenKind}, }; use deref as r; @@ -266,8 +266,8 @@ fn raw_idents() { Rust2015, "K!(r#self r#_);", Err(r!([ - Error::InvalidRawIdent(IdentKind::Normal, _), - Error::InvalidRawIdent(IdentKind::Normal, _) + Error::InvalidIdent(IdentKind::Normal, IdentMode::Raw, _), + Error::InvalidIdent(IdentKind::Normal, IdentMode::Raw, _), ])) ); @@ -352,12 +352,11 @@ fn raw_ticked_idents() { .. }) ); - t!( parse_item, Rust2021, "type R = &'r#_ ();", - Err(r!([Error::InvalidRawIdent(IdentKind::Ticked, _)])) + Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _)])) ); // We once used to accept this by mistake! @@ -367,16 +366,26 @@ fn raw_ticked_idents() { Rust2021, "seg!('r#self 'r#Self);", Err(r!([ - Error::InvalidRawIdent(IdentKind::Ticked, _), - Error::InvalidRawIdent(IdentKind::Ticked, _) + Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _), + Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _) ])) ); // We once used to accept this by mistake! - t!(parse_item, Rust2021, "W!('r#0);", Err(r!([Error::InvalidRawIdent(IdentKind::Ticked, _)]))); + t!( + parse_item, + Rust2021, + "W!('r#0);", + Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _)])) + ); // We once used to accept this by mistake treating it as an empty raw ticked ident! - t!(parse_item, Rust2021, "O!('r#);", Err(r!([Error::InvalidRawIdent(IdentKind::Ticked, _)]))); + t!( + parse_item, + Rust2021, + "O!('r#);", + Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _)])) + ); // We once used to accept this by mistake treating it as a multi-scalar char lit! t!( @@ -384,7 +393,7 @@ fn raw_ticked_idents() { Rust2021, "W!('r#');", Err(r!([ - Error::InvalidRawIdent(IdentKind::Ticked, _), + Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _), Error::UnterminatedCharLit(_), Error::MissingClosingDelimiters(_), ])) @@ -413,8 +422,18 @@ fn stropped_keywords() { ); // Using a macro call to demonstrate that this is a lexical error even! - t!(parse_item, Rust2021, "Q![ k# ];", Err(r!([Error::InvalidStroppedKeyword(_)]))); - t!(parse_item, Rust2021, "Q![ k#common ];", Err(r!([Error::InvalidStroppedKeyword(_)]))); + t!( + parse_item, + Rust2021, + "Q![ k# ];", + Err(r!([Error::InvalidIdent(IdentKind::Normal, IdentMode::Keyword, _)])) + ); + t!( + parse_item, + Rust2021, + "Q![ k#common ];", + Err(r!([Error::InvalidIdent(IdentKind::Normal, IdentMode::Keyword, _)])) + ); t!( parse_expr, @@ -423,3 +442,58 @@ fn stropped_keywords() { Err(r!([Error::UnexpectedToken(Token { kind: TokenKind::Hash, .. }, _)])) ); } + +#[test] +fn stropped_ticked_keywords() { + // FIXME: Both `'r#static` and `'k#static` are legal. Does that really make sense? + // I'm already confused why `'r#_` is illegal but `'r#static` is not. + t!( + parse_ty, + Rust2021, + "&'k#static ()", + Ok(ast::Ty::Ref(r!(ast::RefTy { lt: Some(ast::Lifetime(ast::Ident!("static"))), .. }))) + ); + + t!( + parse_ty, + Rust2021, + "&'k#_ ()", + Ok(ast::Ty::Ref(r!(ast::RefTy { lt: Some(ast::Lifetime(ast::Ident!("_"))), .. }))) + ); + + // These are lexically accepted despite + // * `'static` not being a valid label and + // * `'if` not being a valid lifetime or label. + // That's fine because non-stropped ticked "keywords" are also lexically allowed. + // Only syntactically they may be invalid. + t!(parse_item, Rust2021, "T!( 'k#static );", Ok(_)); + t!(parse_item, Rust2021, "T!( 'k#if );", Ok(_)); + + // Using a macro call to demonstrate that this is a lexical error even! + t!( + parse_item, + Rust2021, + "T!( 'k#common );", + Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Keyword, _)])) + ); + + // Using a macro call to demonstrate that this is a lexical error even! + t!( + parse_item, + Rust2021, + "U!( 'k# );", + Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Keyword, _)])) + ); + + t!( + parse_ty, + Rust2018, + "&'k#static ();", + Err(r!([Error::UnexpectedToken(Token { kind: TokenKind::Hash, .. }, _)])) + ); + + // While `'k#static` is lexically valid, it's not syntactically valid as a label: + t!(parse_expr, Rust2021, "'k#static: loop {}", Err(r!([Error::ReservedLabel(_)]))); + + t!(parse_expr, Rust2021, "'k#_: loop {}", Err(r!([Error::ReservedLabel(_)]))); +}