Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions src/cli/diagnostics.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use annotate_snippets as ann;
use rasur::{
error::{Error, InvalidScalarPlace},
lexer::IdentKind,
lexer::{IdentKind, IdentMode},
parser::ExpectedFragment,
span::Span,
token::{Repr, Token, TokenKind},
Expand Down Expand Up @@ -81,12 +81,18 @@ fn convert(error: Error, cx: &RenderCx<'_>) -> Diag {
Error::UnknownBuiltinSyntax(span) => Diag::new("unknown built-in syntax").highlight(span),
Error::InvalidLetChain(span) => Diag::new("invalid let-chain").highlight(span),
Error::ReuseInherentImpl => Diag::new("inherent impls cannot be reused"),
Error::InvalidRawIdent(IdentKind::Normal, span) => {
Error::InvalidIdent(IdentKind::Normal, IdentMode::Raw, span) => {
Diag::new("invalid raw identifier").highlight(span)
}
Error::InvalidRawIdent(IdentKind::Ticked, span) => {
Error::InvalidIdent(IdentKind::Normal, IdentMode::Keyword, span) => {
Diag::new("invalid stropped keyword").highlight(span)
}
Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, span) => {
Diag::new("invalid raw ticked identifier").highlight(span)
}
Error::InvalidIdent(IdentKind::Ticked, IdentMode::Keyword, span) => {
Diag::new("invalid stropped ticked keyword").highlight(span)
}
Error::UnterminatedBlockComment(span) => {
Diag::new("unterminated block comment").highlight(span)
}
Expand Down
9 changes: 7 additions & 2 deletions src/lib/error.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
use crate::{lexer::IdentKind, parser::ExpectedFragment, span::Span, token::Token};
use crate::{
lexer::{IdentKind, IdentMode},
parser::ExpectedFragment,
span::Span,
token::Token,
};
use Default::default;
use std::cell::RefCell;

Expand Down Expand Up @@ -79,9 +84,9 @@ pub enum Error {
InvalidNumericIdent(Span),
InvalidOpAfterCast(Span),
InvalidParenthesizedBound,
InvalidRawIdent(IdentKind, Span),
InvalidScalar(char, InvalidScalarPlace, Span),
InvalidStrLitDelimiter(Span),
InvalidIdent(IdentKind, IdentMode, Span),
InvalidTyPrefix(Span),
LifetimeObjectTyWithoutPlus(Span),
MisplacedReceiver(Span),
Expand Down
93 changes: 70 additions & 23 deletions src/lib/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -444,20 +444,30 @@ impl<'err, 'src> Lexer<'err, 'src> {
return TokenKind::CharLit;
}
Some('#') if self.edition >= Edition::Rust2021 => {
if self.source(unticked) != "r" {
self.error(Error::ReservedPrefix(self.span(unticked)));
self.advance(); // `#`
return TokenKind::Error;
}
self.advance();

return match self.fin_lex_raw_ident(IdentKind::Ticked, start) {
Some(()) => TokenKind::TickedIdent,
None => {
self.error(Error::InvalidRawIdent(IdentKind::Ticked, self.span(start)));
TokenKind::Error
let mode = match self.source(unticked) {
// FEATURE: XXX <https://github.com/rust-lang/rust/issues/0>
"k" => IdentMode::Keyword,
"r" => IdentMode::Raw,
_ => {
self.error(Error::ReservedPrefix(self.span(unticked)));
self.advance(); // `#`
return TokenKind::Error;
}
};
self.advance();

return self.fin_lex_prefixed_ident(
IdentKind::Ticked,
mode,
None,
|ident, mode| match (mode, ident) {
| (IdentMode::Keyword, TokenKind::CommonIdent)
| (IdentMode::Raw, PathSegKeyword!() | TokenKind::Underscore) => false,
_ => true,
},
|_| TokenKind::TickedIdent,
start,
);
}
_ => return TokenKind::TickedIdent,
}
Expand Down Expand Up @@ -578,13 +588,30 @@ impl<'err, 'src> Lexer<'err, 'src> {
self.advance();
return self.fin_lex_raw_guarded_str_lit(start);
}
// FEATURE: XXX <https://github.com/rust-lang/rust/issues/0>
("k", Some('#')) if self.edition >= Edition::Rust2021 => {
self.advance();

return self.fin_lex_prefixed_ident(
IdentKind::Normal,
IdentMode::Keyword,
None,
|ident, _| ident != TokenKind::CommonIdent,
std::convert::identity,
start,
);
}
("r", Some('#')) => {
self.advance();

return match self.fin_lex_raw_ident(IdentKind::Normal, start) {
Some(()) => TokenKind::CommonIdent,
None => self.fin_lex_raw_guarded_str_lit(start),
};
return self.fin_lex_prefixed_ident(
IdentKind::Normal,
IdentMode::Raw,
Some(|this, start| this.fin_lex_raw_guarded_str_lit(start)),
|ident, _| !matches!(ident, PathSegKeyword!() | TokenKind::Underscore),
|_| TokenKind::CommonIdent,
start,
);
}
(_, Some(char @ ('"' | '\'' | '#'))) if self.edition >= Edition::Rust2021 => {
self.error(Error::ReservedPrefix(self.span(start)));
Expand All @@ -599,22 +626,36 @@ impl<'err, 'src> Lexer<'err, 'src> {
self.fin_lex_str_lit(raw, flavor, start)
}

fn fin_lex_raw_ident(&mut self, kind: IdentKind, start: ByteIndex) -> Option<()> {
fn fin_lex_prefixed_ident(
&mut self,
kind: IdentKind,
mode: IdentMode,
fallback: Option<fn(&mut Self, ByteIndex) -> TokenKind>,
validate: fn(TokenKind, IdentMode) -> bool,
map: fn(TokenKind) -> TokenKind,
start: ByteIndex,
) -> TokenKind {
if !self.peek().is_some_and(is_ident_start) {
return None;
return match fallback {
Some(fallback) => fallback(self, start),
None => {
self.error(Error::InvalidIdent(kind, mode, self.span(start)));
TokenKind::Error
}
};
}

let unprefixed = self.index();
self.advance();
self.advance_while(is_ident_middle);

if let PathSegKeyword!() | TokenKind::Underscore =
lex_ident(self.source(unprefixed), self.edition)
{
self.error(Error::InvalidRawIdent(kind, self.span(start)));
let ident = lex_ident(self.source(unprefixed), self.edition);

if !validate(ident, mode) {
self.error(Error::InvalidIdent(kind, mode, self.span(start)));
}

Some(())
map(ident)
}

// FIXME: Consolidate with `fin_lex_str_lit` smh
Expand Down Expand Up @@ -775,6 +816,12 @@ pub enum IdentKind {
Ticked,
}

#[derive(Clone, Copy, Debug)]
pub enum IdentMode {
Raw,
Keyword,
}

#[derive(Clone, Copy)]
enum TextLitKind {
Str,
Expand Down
1 change: 1 addition & 0 deletions src/lib/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ impl<'tok, 'err, 'src> Parser<'tok, 'err, 'src> {
self.advance();

let source = &self.source(span)[const { "'".len() }..];
let source = source.strip_prefix("k#").unwrap_or(source);
let ident = lex_ident(source, self.edition);
if !validate(ident) {
self.error(error(span));
Expand Down
127 changes: 117 additions & 10 deletions src/lib/parser/test/misc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{
ast,
edition::Edition::*,
error::Error,
lexer::IdentKind,
lexer::{IdentKind, IdentMode},
token::{Token, TokenKind},
};
use deref as r;
Expand Down Expand Up @@ -266,8 +266,8 @@ fn raw_idents() {
Rust2015,
"K!(r#self r#_);",
Err(r!([
Error::InvalidRawIdent(IdentKind::Normal, _),
Error::InvalidRawIdent(IdentKind::Normal, _)
Error::InvalidIdent(IdentKind::Normal, IdentMode::Raw, _),
Error::InvalidIdent(IdentKind::Normal, IdentMode::Raw, _),
]))
);

Expand Down Expand Up @@ -352,12 +352,11 @@ fn raw_ticked_idents() {
..
})
);

t!(
parse_item,
Rust2021,
"type R = &'r#_ ();",
Err(r!([Error::InvalidRawIdent(IdentKind::Ticked, _)]))
Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _)]))
);

// We once used to accept this by mistake!
Expand All @@ -367,26 +366,134 @@ fn raw_ticked_idents() {
Rust2021,
"seg!('r#self 'r#Self);",
Err(r!([
Error::InvalidRawIdent(IdentKind::Ticked, _),
Error::InvalidRawIdent(IdentKind::Ticked, _)
Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _),
Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _)
]))
);

// We once used to accept this by mistake!
t!(parse_item, Rust2021, "W!('r#0);", Err(r!([Error::InvalidRawIdent(IdentKind::Ticked, _)])));
t!(
parse_item,
Rust2021,
"W!('r#0);",
Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _)]))
);

// We once used to accept this by mistake treating it as an empty raw ticked ident!
t!(parse_item, Rust2021, "O!('r#);", Err(r!([Error::InvalidRawIdent(IdentKind::Ticked, _)])));
t!(
parse_item,
Rust2021,
"O!('r#);",
Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _)]))
);

// We once used to accept this by mistake treating it as a multi-scalar char lit!
t!(
parse_item,
Rust2021,
"W!('r#');",
Err(r!([
Error::InvalidRawIdent(IdentKind::Ticked, _),
Error::InvalidIdent(IdentKind::Ticked, IdentMode::Raw, _),
Error::UnterminatedCharLit(_),
Error::MissingClosingDelimiters(_),
]))
);
}

#[test]
fn stropped_keywords() {
t!(
parse_item,
Rust2021,
"k#fn diverge() -> ! { k#loop {} }",
Ok(ast::Item {
kind: ast::ItemKind::Fn(r!(ast::FnItem {
binder: ast::Ident!("diverge"),
body: Some(ast::BlockExpr {
stmts: r!([ast::Stmt::Expr(
ast::Expr { kind: ast::ExprKind::Loop(..), .. },
_
)])
}),
..
})),
..
})
);

// Using a macro call to demonstrate that this is a lexical error even!
t!(
parse_item,
Rust2021,
"Q![ k# ];",
Err(r!([Error::InvalidIdent(IdentKind::Normal, IdentMode::Keyword, _)]))
);
t!(
parse_item,
Rust2021,
"Q![ k#common ];",
Err(r!([Error::InvalidIdent(IdentKind::Normal, IdentMode::Keyword, _)]))
);

t!(
parse_expr,
Rust2018,
"k#loop {}",
Err(r!([Error::UnexpectedToken(Token { kind: TokenKind::Hash, .. }, _)]))
);
}

#[test]
fn stropped_ticked_keywords() {
// FIXME: Both `'r#static` and `'k#static` are legal. Does that really make sense?
// I'm already confused why `'r#_` is illegal but `'r#static` is not.
t!(
parse_ty,
Rust2021,
"&'k#static ()",
Ok(ast::Ty::Ref(r!(ast::RefTy { lt: Some(ast::Lifetime(ast::Ident!("static"))), .. })))
);

t!(
parse_ty,
Rust2021,
"&'k#_ ()",
Ok(ast::Ty::Ref(r!(ast::RefTy { lt: Some(ast::Lifetime(ast::Ident!("_"))), .. })))
);

// These are lexically accepted despite
// * `'static` not being a valid label and
// * `'if` not being a valid lifetime or label.
// That's fine because non-stropped ticked "keywords" are also lexically allowed.
// Only syntactically they may be invalid.
t!(parse_item, Rust2021, "T!( 'k#static );", Ok(_));
t!(parse_item, Rust2021, "T!( 'k#if );", Ok(_));

// Using a macro call to demonstrate that this is a lexical error even!
t!(
parse_item,
Rust2021,
"T!( 'k#common );",
Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Keyword, _)]))
);

// Using a macro call to demonstrate that this is a lexical error even!
t!(
parse_item,
Rust2021,
"U!( 'k# );",
Err(r!([Error::InvalidIdent(IdentKind::Ticked, IdentMode::Keyword, _)]))
);

t!(
parse_ty,
Rust2018,
"&'k#static ();",
Err(r!([Error::UnexpectedToken(Token { kind: TokenKind::Hash, .. }, _)]))
);

// While `'k#static` is lexically valid, it's not syntactically valid as a label:
t!(parse_expr, Rust2021, "'k#static: loop {}", Err(r!([Error::ReservedLabel(_)])));

t!(parse_expr, Rust2021, "'k#_: loop {}", Err(r!([Error::ReservedLabel(_)])));
}