From c461182521473cb7c786862b0ad6fbbce8a17baf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Sun, 15 Mar 2026 01:23:27 +0000 Subject: [PATCH 1/6] Lex lifetimes using emoji and emit appropriate error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lex and parse emoji in lifetimes, and disallow them in the parser with a hard error. Allow emoji to start a lifetime name even if they are not XID_Start. ``` error: lifetimes cannot contain emoji --> $DIR/emoji-in-lifetime.rs:1:22 | LL | fn bad_lifetime_name<'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ>( | ^^^^^^^^^^^^^^^^^^^^^ ``` --- compiler/rustc_lexer/src/lib.rs | 19 ++++++++++++++++--- compiler/rustc_parse/src/lexer/mod.rs | 7 +++++-- tests/ui/lexer/emoji-in-lifetime.rs | 9 +++++++++ tests/ui/lexer/emoji-in-lifetime.stderr | 20 ++++++++++++++++++++ 4 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 tests/ui/lexer/emoji-in-lifetime.rs create mode 100644 tests/ui/lexer/emoji-in-lifetime.stderr diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index dc6e3b1f358dc..91cf3237bda8f 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -141,6 +141,7 @@ pub enum TokenKind { /// A lifetime, e.g. `'a`. Lifetime { starts_with_number: bool, + has_emoji: bool, }, /// `;` @@ -975,6 +976,7 @@ impl<'a> Cursor<'a> { fn lifetime_or_char(&mut self) -> TokenKind { debug_assert!(self.prev() == '\''); + let mut has_emoji = false; let can_be_a_lifetime = if self.second() == '\'' { // It's surely not a lifetime. false @@ -982,7 +984,12 @@ impl<'a> Cursor<'a> { // If the first symbol is valid for identifier, it can be a lifetime. // Also check if it's a number for a better error reporting (so '0 will // be reported as invalid lifetime and not as unterminated char literal). - is_id_start(self.first()) || self.first().is_ascii_digit() + let c = self.first(); + let is_emoji = !c.is_ascii() && c.is_emoji_char(); + if is_emoji { + has_emoji = true; + } + is_id_start(c) || c.is_ascii_digit() || is_emoji }; if !can_be_a_lifetime { @@ -1012,7 +1019,13 @@ impl<'a> Cursor<'a> { // First symbol can be a number (which isn't a valid identifier start), // so skip it without any checks. self.bump(); - self.eat_while(is_id_continue); + self.eat_while(|c| { + let is_emoji = !c.is_ascii() && c.is_emoji_char(); + if is_emoji { + has_emoji = true; + } + is_id_continue(c) || is_emoji + }); match self.first() { // Check if after skipping literal contents we've met a closing @@ -1024,7 +1037,7 @@ impl<'a> Cursor<'a> { Literal { kind, suffix_start: self.pos_within_token() } } '#' if !starts_with_number => UnknownPrefixLifetime, - _ => Lifetime { starts_with_number }, + _ => Lifetime { starts_with_number, has_emoji }, } } diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index cd90655125b2b..bfbc203754fc4 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -316,19 +316,22 @@ impl<'psess, 'src> Lexer<'psess, 'src> { self.lint_literal_unicode_text_flow(symbol, kind, self.mk_sp(start, self.pos), "literal"); token::Literal(token::Lit { kind, symbol, suffix }) } - rustc_lexer::TokenKind::Lifetime { starts_with_number } => { + rustc_lexer::TokenKind::Lifetime { starts_with_number, has_emoji } => { // Include the leading `'` in the real identifier, for macro // expansion purposes. See #12512 for the gory details of why // this is necessary. let lifetime_name = nfc_normalize(self.str_from(start)); self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); + let span = self.mk_sp(start, self.pos); if starts_with_number { - let span = self.mk_sp(start, self.pos); self.dcx() .struct_err("lifetimes cannot start with a number") .with_span(span) .stash(span, StashKey::LifetimeIsChar); } + if has_emoji { + self.dcx().struct_span_err(span, "lifetimes cannot contain emoji").emit(); + } token::Lifetime(lifetime_name, IdentIsRaw::No) } rustc_lexer::TokenKind::RawLifetime => { diff --git a/tests/ui/lexer/emoji-in-lifetime.rs b/tests/ui/lexer/emoji-in-lifetime.rs new file mode 100644 index 0000000000000..bbd0e27821a77 --- /dev/null +++ b/tests/ui/lexer/emoji-in-lifetime.rs @@ -0,0 +1,9 @@ +// #141081 +fn bad_lifetime_name<'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ>(_: &'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ()) {} +//~^ ERROR: lifetimes cannot contain emoji +//~| ERROR: lifetimes cannot contain emoji +fn main() { + '๐Ÿ›: { //~ ERROR: lifetimes cannot contain emoji + todo!(); + }; +} diff --git a/tests/ui/lexer/emoji-in-lifetime.stderr b/tests/ui/lexer/emoji-in-lifetime.stderr new file mode 100644 index 0000000000000..03f1f997b0d9e --- /dev/null +++ b/tests/ui/lexer/emoji-in-lifetime.stderr @@ -0,0 +1,20 @@ +error: lifetimes cannot contain emoji + --> $DIR/emoji-in-lifetime.rs:2:22 + | +LL | fn bad_lifetime_name<'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ>(_: &'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ ()) {} + | ^^^^^^^^^^^^^^^^^^^^^ + +error: lifetimes cannot contain emoji + --> $DIR/emoji-in-lifetime.rs:2:45 + | +LL | fn bad_lifetime_name<'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ>(_: &'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ ()) {} + | ^^^^^^^^^^^^^^^^^^^^^ + +error: lifetimes cannot contain emoji + --> $DIR/emoji-in-lifetime.rs:6:5 + | +LL | '๐Ÿ›: { + | ^^^ + +error: aborting due to 3 previous errors + From dfdc525ff244c0fa2c5f0e2181269ef76b90fd2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Wed, 18 Mar 2026 18:09:47 +0000 Subject: [PATCH 2/6] Unify lifetime and identifier parsing --- Cargo.lock | 1 + compiler/rustc_lexer/src/lib.rs | 46 ++++++-------- compiler/rustc_parse/Cargo.toml | 1 + compiler/rustc_parse/src/lexer/mod.rs | 60 ++++++++++++++++--- tests/ui/lexer/emoji-in-lifetime.rs | 19 ++++-- tests/ui/lexer/emoji-in-lifetime.stderr | 40 +++++++++---- .../lex-bad-str-literal-as-char-1.stderr | 2 +- tests/ui/parser/numeric-lifetime.stderr | 4 +- 8 files changed, 119 insertions(+), 54 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 98567f858e9f1..2763d12fc9c33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4450,6 +4450,7 @@ dependencies = [ "thin-vec", "tracing", "unicode-normalization", + "unicode-properties", "unicode-width 0.2.2", ] diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 91cf3237bda8f..fea8980103cce 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -140,8 +140,7 @@ pub enum TokenKind { /// A lifetime, e.g. `'a`. Lifetime { - starts_with_number: bool, - has_emoji: bool, + invalid: bool, }, /// `;` @@ -585,7 +584,7 @@ impl<'a> Cursor<'a> { let kind = RawStr { n_hashes: res.ok() }; Literal { kind, suffix_start } } - _ => self.ident_or_unknown_prefix(), + _ => self.ident_or_unknown_prefix(false), }, // Byte literal, byte string literal, raw byte string literal or identifier. @@ -604,7 +603,7 @@ impl<'a> Cursor<'a> { // Identifier (this should be checked after other variant that can // start as identifier). - c if is_id_start(c) => self.ident_or_unknown_prefix(), + c if is_id_start(c) => self.ident_or_unknown_prefix(false), // Numeric literal. c @ '0'..='9' => { @@ -662,7 +661,7 @@ impl<'a> Cursor<'a> { Literal { kind, suffix_start } } // Identifier starting with an emoji. Only lexed for graceful error recovery. - c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(), + c if is_emoji(c) => self.invalid_ident(), _ => Unknown, }; if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes) @@ -833,25 +832,22 @@ impl<'a> Cursor<'a> { RawIdent } - fn ident_or_unknown_prefix(&mut self) -> TokenKind { - debug_assert!(is_id_start(self.prev())); + fn ident_or_unknown_prefix(&mut self, already_invalid: bool) -> TokenKind { + debug_assert!(is_id_start(self.prev()) || already_invalid); // Start is already eaten, eat the rest of identifier. self.eat_while(is_id_continue); // Known prefixes must have been handled earlier. So if // we see a prefix here, it is definitely an unknown prefix. match self.first() { '#' | '"' | '\'' => UnknownPrefix, - c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(), + c if is_emoji(c) => self.invalid_ident(), _ => Ident, } } fn invalid_ident(&mut self) -> TokenKind { // Start is already eaten, eat the rest of identifier. - self.eat_while(|c| { - const ZERO_WIDTH_JOINER: char = '\u{200d}'; - is_id_continue(c) || (!c.is_ascii() && c.is_emoji_char()) || c == ZERO_WIDTH_JOINER - }); + self.eat_while(|c| is_id_continue(c) || is_emoji(c)); // An invalid identifier followed by '#' or '"' or '\'' could be // interpreted as an invalid literal prefix. We don't bother doing that // because the treatment of invalid identifiers and invalid prefixes @@ -896,7 +892,7 @@ impl<'a> Cursor<'a> { let kind = mk_kind_raw(res.ok()); Literal { kind, suffix_start } } - _ => self.ident_or_unknown_prefix(), + _ => self.ident_or_unknown_prefix(false), } } @@ -976,7 +972,7 @@ impl<'a> Cursor<'a> { fn lifetime_or_char(&mut self) -> TokenKind { debug_assert!(self.prev() == '\''); - let mut has_emoji = false; + let mut invalid = false; let can_be_a_lifetime = if self.second() == '\'' { // It's surely not a lifetime. false @@ -985,11 +981,9 @@ impl<'a> Cursor<'a> { // Also check if it's a number for a better error reporting (so '0 will // be reported as invalid lifetime and not as unterminated char literal). let c = self.first(); - let is_emoji = !c.is_ascii() && c.is_emoji_char(); - if is_emoji { - has_emoji = true; - } - is_id_start(c) || c.is_ascii_digit() || is_emoji + invalid |= c.is_ascii_digit(); + invalid |= is_emoji(c); + is_id_start(c) || invalid }; if !can_be_a_lifetime { @@ -1019,13 +1013,7 @@ impl<'a> Cursor<'a> { // First symbol can be a number (which isn't a valid identifier start), // so skip it without any checks. self.bump(); - self.eat_while(|c| { - let is_emoji = !c.is_ascii() && c.is_emoji_char(); - if is_emoji { - has_emoji = true; - } - is_id_continue(c) || is_emoji - }); + invalid |= matches!(self.ident_or_unknown_prefix(invalid), InvalidIdent); match self.first() { // Check if after skipping literal contents we've met a closing @@ -1037,7 +1025,7 @@ impl<'a> Cursor<'a> { Literal { kind, suffix_start: self.pos_within_token() } } '#' if !starts_with_number => UnknownPrefixLifetime, - _ => Lifetime { starts_with_number, has_emoji }, + _ => Lifetime { invalid }, } } @@ -1290,3 +1278,7 @@ impl<'a> Cursor<'a> { self.eat_while(is_id_continue); } } + +fn is_emoji(c: char) -> bool { + !c.is_ascii() && c.is_emoji_char() +} diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml index 28a67ae12126b..2c9c4f54d1034 100644 --- a/compiler/rustc_parse/Cargo.toml +++ b/compiler/rustc_parse/Cargo.toml @@ -20,6 +20,7 @@ rustc_span = { path = "../rustc_span" } thin-vec = "0.2.12" tracing = "0.1" unicode-normalization = "0.1.25" +unicode-properties = { version = "0.1.4", default-features = false, features = ["emoji"] } unicode-width = "0.2.2" # tidy-alphabetical-end diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index bfbc203754fc4..4ecaea2aff437 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -17,6 +17,7 @@ use rustc_session::lint::builtin::{ use rustc_session::parse::ParseSess; use rustc_span::{BytePos, Pos, Span, Symbol, sym}; use tracing::debug; +use unicode_properties::emoji::UnicodeEmoji; use crate::errors; use crate::lexer::diagnostics::TokenTreeDiagInfo; @@ -316,21 +317,62 @@ impl<'psess, 'src> Lexer<'psess, 'src> { self.lint_literal_unicode_text_flow(symbol, kind, self.mk_sp(start, self.pos), "literal"); token::Literal(token::Lit { kind, symbol, suffix }) } - rustc_lexer::TokenKind::Lifetime { starts_with_number, has_emoji } => { + rustc_lexer::TokenKind::Lifetime { invalid } => { // Include the leading `'` in the real identifier, for macro // expansion purposes. See #12512 for the gory details of why // this is necessary. let lifetime_name = nfc_normalize(self.str_from(start)); self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1))); let span = self.mk_sp(start, self.pos); - if starts_with_number { - self.dcx() - .struct_err("lifetimes cannot start with a number") - .with_span(span) - .stash(span, StashKey::LifetimeIsChar); - } - if has_emoji { - self.dcx().struct_span_err(span, "lifetimes cannot contain emoji").emit(); + if invalid { + let name = lifetime_name.as_str(); + // skip(1) to skip the `'` + let starts_with_number = matches!( + name.chars().skip(1).next(), + Some(c) if c.is_ascii_digit() + ); + let mut emoji = vec![]; + for (i, c) in name.char_indices().skip(1) { + let i = i as u32; + if !c.is_ascii() && c.is_emoji_char() { + let lo = start + BytePos(i); + emoji.push(self.mk_sp(lo, lo + Pos::from_usize(c.len_utf8()))); + } + } + let err = match (starts_with_number, &emoji[..]) { + (false, []) => { + unreachable!("lifetime {name:?} incorrectly marked as invalid?"); + } + (true, []) if name.len() > 2 => { + // Point at the first lifetime name character. + let start_span = self.mk_sp(start + BytePos(1), start + BytePos(2)); + self.dcx() + .struct_err(format!( + "lifetimes cannot start with a number: `{name}`" + )) + .with_span(start_span) + .with_span_label(span, "") + } + (true, []) => { + // Point at the whole lifetime name. + self.dcx() + .struct_err(format!( + "lifetimes cannot start with a number: `{name}`" + )) + .with_span(span) + } + (false, [_, ..]) => self.dcx() + .struct_err(format!("lifetimes cannot have emoji: `{name}`")) + .with_span(emoji.clone()) + .with_span_label(span, ""), + (true, [_, ..]) => self.dcx() + .struct_err(format!( + "invalid lifetime name: `{}`", + name.escape_default(), + )) + .with_span(span), + }; + err.stash(span, StashKey::LifetimeIsChar); } token::Lifetime(lifetime_name, IdentIsRaw::No) } diff --git a/tests/ui/lexer/emoji-in-lifetime.rs b/tests/ui/lexer/emoji-in-lifetime.rs index bbd0e27821a77..8994e592f8b46 100644 --- a/tests/ui/lexer/emoji-in-lifetime.rs +++ b/tests/ui/lexer/emoji-in-lifetime.rs @@ -1,9 +1,20 @@ // #141081 -fn bad_lifetime_name<'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ>(_: &'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ ()) {} -//~^ ERROR: lifetimes cannot contain emoji -//~| ERROR: lifetimes cannot contain emoji +fn bad_lifetime_name< + '๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ,//~ ERROR: lifetimes cannot have emoji + '12, //~ ERROR: lifetimes cannot start with a number + 'a๐Ÿ›, //~ ERROR: lifetimes cannot have emoji + '1๐Ÿ›, //~ ERROR: invalid lifetime name + '1, //~ ERROR: lifetimes cannot start with a number + 'aโ€Œb // bare zero-width-joiners are accepted as XID_Continue +>() {} + + + + + + fn main() { - '๐Ÿ›: { //~ ERROR: lifetimes cannot contain emoji + '๐Ÿ›: { //~ ERROR: lifetimes cannot have emoji todo!(); }; } diff --git a/tests/ui/lexer/emoji-in-lifetime.stderr b/tests/ui/lexer/emoji-in-lifetime.stderr index 03f1f997b0d9e..4f5743fbed5ad 100644 --- a/tests/ui/lexer/emoji-in-lifetime.stderr +++ b/tests/ui/lexer/emoji-in-lifetime.stderr @@ -1,20 +1,38 @@ -error: lifetimes cannot contain emoji - --> $DIR/emoji-in-lifetime.rs:2:22 +error: lifetimes cannot have emoji: `'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ` + --> $DIR/emoji-in-lifetime.rs:3:6 | -LL | fn bad_lifetime_name<'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ>(_: &'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ ()) {} - | ^^^^^^^^^^^^^^^^^^^^^ +LL | '๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ, + | -^^^^^^------^^^^^^^^ -error: lifetimes cannot contain emoji - --> $DIR/emoji-in-lifetime.rs:2:45 +error: lifetimes cannot start with a number: `'12` + --> $DIR/emoji-in-lifetime.rs:4:6 | -LL | fn bad_lifetime_name<'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ>(_: &'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ ()) {} - | ^^^^^^^^^^^^^^^^^^^^^ +LL | '12, + | -^- -error: lifetimes cannot contain emoji +error: lifetimes cannot have emoji: `'a๐Ÿ›` + --> $DIR/emoji-in-lifetime.rs:5:7 + | +LL | 'a๐Ÿ›, + | --^^ + +error: invalid lifetime name: `\'1\u{1f41b}` --> $DIR/emoji-in-lifetime.rs:6:5 | +LL | '1๐Ÿ›, + | ^^^^ + +error: lifetimes cannot start with a number: `'1` + --> $DIR/emoji-in-lifetime.rs:7:5 + | +LL | '1, + | ^^ + +error: lifetimes cannot have emoji: `'๐Ÿ›` + --> $DIR/emoji-in-lifetime.rs:17:6 + | LL | '๐Ÿ›: { - | ^^^ + | -^^ -error: aborting due to 3 previous errors +error: aborting due to 6 previous errors diff --git a/tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr b/tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr index 81ee697802b9c..7503774cd686f 100644 --- a/tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr +++ b/tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr @@ -10,7 +10,7 @@ LL - println!('1 + 1'); LL + println!("1 + 1"); | -error: lifetimes cannot start with a number +error: lifetimes cannot start with a number: `'1` --> $DIR/lex-bad-str-literal-as-char-1.rs:3:14 | LL | println!('1 + 1'); diff --git a/tests/ui/parser/numeric-lifetime.stderr b/tests/ui/parser/numeric-lifetime.stderr index 7c1bcb7263171..46b06ae7dc3cb 100644 --- a/tests/ui/parser/numeric-lifetime.stderr +++ b/tests/ui/parser/numeric-lifetime.stderr @@ -6,13 +6,13 @@ LL | let x: usize = ""; | | | expected due to this -error: lifetimes cannot start with a number +error: lifetimes cannot start with a number: `'1` --> $DIR/numeric-lifetime.rs:1:10 | LL | struct S<'1> { s: &'1 usize } | ^^ -error: lifetimes cannot start with a number +error: lifetimes cannot start with a number: `'1` --> $DIR/numeric-lifetime.rs:1:20 | LL | struct S<'1> { s: &'1 usize } From 702197abd2ba344efce263a9b0e7e139032a8d99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Wed, 18 Mar 2026 18:32:30 +0000 Subject: [PATCH 3/6] Use `bad_unicode_identifiers` for lifetimes --- compiler/rustc_parse/src/lexer/mod.rs | 56 ++++++++----------------- tests/ui/lexer/emoji-in-lifetime.rs | 9 ++-- tests/ui/lexer/emoji-in-lifetime.stderr | 41 +++++++++--------- 3 files changed, 45 insertions(+), 61 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 4ecaea2aff437..7d26ea9454f4f 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -331,48 +331,28 @@ impl<'psess, 'src> Lexer<'psess, 'src> { name.chars().skip(1).next(), Some(c) if c.is_ascii_digit() ); - let mut emoji = vec![]; - for (i, c) in name.char_indices().skip(1) { - let i = i as u32; - if !c.is_ascii() && c.is_emoji_char() { - let lo = start + BytePos(i); - emoji.push(self.mk_sp(lo, lo + Pos::from_usize(c.len_utf8()))); - } + if name.chars().any(|c| !c.is_ascii() && c.is_emoji_char()) { + self.psess + .bad_unicode_identifiers + .borrow_mut() + .entry(lifetime_name) + .or_default() + .push(span); } - let err = match (starts_with_number, &emoji[..]) { - (false, []) => { - unreachable!("lifetime {name:?} incorrectly marked as invalid?"); - } - (true, []) if name.len() > 2 => { + if starts_with_number { + let mut err = self.dcx() + .struct_err(format!( + "lifetimes cannot start with a number: `{name}`" + )) + .with_span(span); + if name.len() > 2 { // Point at the first lifetime name character. let start_span = self.mk_sp(start + BytePos(1), start + BytePos(2)); - self.dcx() - .struct_err(format!( - "lifetimes cannot start with a number: `{name}`" - )) - .with_span(start_span) - .with_span_label(span, "") + err.span(start_span); + err.span_label(span, ""); } - (true, []) => { - // Point at the whole lifetime name. - self.dcx() - .struct_err(format!( - "lifetimes cannot start with a number: `{name}`" - )) - .with_span(span) - } - (false, [_, ..]) => self.dcx() - .struct_err(format!("lifetimes cannot have emoji: `{name}`")) - .with_span(emoji.clone()) - .with_span_label(span, ""), - (true, [_, ..]) => self.dcx() - .struct_err(format!( - "invalid lifetime name: `{}`", - name.escape_default(), - )) - .with_span(span), - }; - err.stash(span, StashKey::LifetimeIsChar); + err.stash(span, StashKey::LifetimeIsChar); + } } token::Lifetime(lifetime_name, IdentIsRaw::No) } diff --git a/tests/ui/lexer/emoji-in-lifetime.rs b/tests/ui/lexer/emoji-in-lifetime.rs index 8994e592f8b46..3442a9380abe9 100644 --- a/tests/ui/lexer/emoji-in-lifetime.rs +++ b/tests/ui/lexer/emoji-in-lifetime.rs @@ -1,9 +1,10 @@ // #141081 fn bad_lifetime_name< - '๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ,//~ ERROR: lifetimes cannot have emoji + '๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ,//~ ERROR: identifiers cannot contain emoji '12, //~ ERROR: lifetimes cannot start with a number - 'a๐Ÿ›, //~ ERROR: lifetimes cannot have emoji - '1๐Ÿ›, //~ ERROR: invalid lifetime name + 'a๐Ÿ›, //~ ERROR: identifiers cannot contain emoji + '1๐Ÿ›, //~ ERROR: identifiers cannot contain emoji + //~^ ERROR: lifetimes cannot start with a number '1, //~ ERROR: lifetimes cannot start with a number 'aโ€Œb // bare zero-width-joiners are accepted as XID_Continue >() {} @@ -14,7 +15,7 @@ fn bad_lifetime_name< fn main() { - '๐Ÿ›: { //~ ERROR: lifetimes cannot have emoji + 'a๐Ÿ›: { // pointed at on the error from line 5 todo!(); }; } diff --git a/tests/ui/lexer/emoji-in-lifetime.stderr b/tests/ui/lexer/emoji-in-lifetime.stderr index 4f5743fbed5ad..7e205639a9a3c 100644 --- a/tests/ui/lexer/emoji-in-lifetime.stderr +++ b/tests/ui/lexer/emoji-in-lifetime.stderr @@ -1,8 +1,23 @@ -error: lifetimes cannot have emoji: `'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ` - --> $DIR/emoji-in-lifetime.rs:3:6 +error: identifiers cannot contain emoji: `'๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ` + --> $DIR/emoji-in-lifetime.rs:3:5 | LL | '๐Ÿ›๐Ÿ›๐Ÿ›family๐Ÿ‘จ๐Ÿ‘ฉ๐Ÿ‘ง๐Ÿ‘ฆ, - | -^^^^^^------^^^^^^^^ + | ^^^^^^^^^^^^^^^^^^^^^ + +error: identifiers cannot contain emoji: `'a๐Ÿ›` + --> $DIR/emoji-in-lifetime.rs:5:5 + | +LL | 'a๐Ÿ›, + | ^^^^ +... +LL | 'a๐Ÿ›: { // pointed at on the error from line 5 + | ^^^^ + +error: identifiers cannot contain emoji: `'1๐Ÿ›` + --> $DIR/emoji-in-lifetime.rs:6:5 + | +LL | '1๐Ÿ›, + | ^^^^ error: lifetimes cannot start with a number: `'12` --> $DIR/emoji-in-lifetime.rs:4:6 @@ -10,29 +25,17 @@ error: lifetimes cannot start with a number: `'12` LL | '12, | -^- -error: lifetimes cannot have emoji: `'a๐Ÿ›` - --> $DIR/emoji-in-lifetime.rs:5:7 - | -LL | 'a๐Ÿ›, - | --^^ - -error: invalid lifetime name: `\'1\u{1f41b}` - --> $DIR/emoji-in-lifetime.rs:6:5 +error: lifetimes cannot start with a number: `'1๐Ÿ›` + --> $DIR/emoji-in-lifetime.rs:6:6 | LL | '1๐Ÿ›, - | ^^^^ + | -^-- error: lifetimes cannot start with a number: `'1` - --> $DIR/emoji-in-lifetime.rs:7:5 + --> $DIR/emoji-in-lifetime.rs:8:5 | LL | '1, | ^^ -error: lifetimes cannot have emoji: `'๐Ÿ›` - --> $DIR/emoji-in-lifetime.rs:17:6 - | -LL | '๐Ÿ›: { - | -^^ - error: aborting due to 6 previous errors From ea749eaa52dea61adc5f6113e6893ec3ec37513c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Wed, 18 Mar 2026 18:38:15 +0000 Subject: [PATCH 4/6] Fix rust-analyzer --- src/tools/rust-analyzer/crates/parser/src/lexed_str.rs | 6 +++--- .../test_data/lexer/err/lifetime_starts_with_a_number.rast | 4 ++-- .../test_data/lexer/err/lifetime_starts_with_a_number.txt | 4 ++-- .../test_data/lexer/err/unclosed_char_with_ferris.rast | 2 +- .../rust-analyzer/crates/proc-macro-srv/src/token_stream.rs | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs index d7eec6cde8c01..df7bef5843da8 100644 --- a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs +++ b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs @@ -255,9 +255,9 @@ impl<'a> Converter<'a> { return; } - rustc_lexer::TokenKind::Lifetime { starts_with_number } => { - if *starts_with_number { - errors.push("Lifetime name cannot start with a number".into()); + rustc_lexer::TokenKind::Lifetime { invalid } => { + if *invalid { + errors.push("Lifetime name contains invalid characters".into()); } LIFETIME_IDENT } diff --git a/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.rast b/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.rast index e919bf2a4aef2..b2bf087e749f3 100644 --- a/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.rast +++ b/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.rast @@ -1,4 +1,4 @@ -LIFETIME_IDENT "'1" error: Lifetime name cannot start with a number +LIFETIME_IDENT "'1" error: Lifetime name contains invalid characters WHITESPACE "\n" -LIFETIME_IDENT "'1lifetime" error: Lifetime name cannot start with a number +LIFETIME_IDENT "'1lifetime" error: Lifetime name contains invalid characters WHITESPACE "\n" diff --git a/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.txt b/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.txt index e919bf2a4aef2..b2bf087e749f3 100644 --- a/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.txt +++ b/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.txt @@ -1,4 +1,4 @@ -LIFETIME_IDENT "'1" error: Lifetime name cannot start with a number +LIFETIME_IDENT "'1" error: Lifetime name contains invalid characters WHITESPACE "\n" -LIFETIME_IDENT "'1lifetime" error: Lifetime name cannot start with a number +LIFETIME_IDENT "'1lifetime" error: Lifetime name contains invalid characters WHITESPACE "\n" diff --git a/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/unclosed_char_with_ferris.rast b/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/unclosed_char_with_ferris.rast index 56f19cce0784e..6ace31f434ef2 100644 --- a/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/unclosed_char_with_ferris.rast +++ b/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/unclosed_char_with_ferris.rast @@ -1 +1 @@ -CHAR "'๐Ÿฆ€" error: Missing trailing `'` symbol to terminate the character literal +LIFETIME_IDENT "'๐Ÿฆ€" error: Lifetime name contains invalid characters diff --git a/src/tools/rust-analyzer/crates/proc-macro-srv/src/token_stream.rs b/src/tools/rust-analyzer/crates/proc-macro-srv/src/token_stream.rs index 2358f6963c79e..d6773153ed988 100644 --- a/src/tools/rust-analyzer/crates/proc-macro-srv/src/token_stream.rs +++ b/src/tools/rust-analyzer/crates/proc-macro-srv/src/token_stream.rs @@ -302,9 +302,9 @@ impl TokenStream { span: span.derive_ranged(range), })) } - rustc_lexer::TokenKind::Lifetime { starts_with_number } => { - if starts_with_number { - return Err("Lifetime cannot start with a number".to_owned()); + rustc_lexer::TokenKind::Lifetime { invalid } => { + if invalid { + return Err(format!("Invalid lifetime identifier: `{}`", &s[range])); } let range = range.start + 1..range.end; tokenstream.push(TokenTree::Punct(Punct { From 956917dd6d995708cef69747d6c602effff86076 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Wed, 18 Mar 2026 18:39:42 +0000 Subject: [PATCH 5/6] remove whitespace --- tests/ui/lexer/emoji-in-lifetime.rs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/ui/lexer/emoji-in-lifetime.rs b/tests/ui/lexer/emoji-in-lifetime.rs index 3442a9380abe9..e4a12f332ae70 100644 --- a/tests/ui/lexer/emoji-in-lifetime.rs +++ b/tests/ui/lexer/emoji-in-lifetime.rs @@ -9,11 +9,6 @@ fn bad_lifetime_name< 'aโ€Œb // bare zero-width-joiners are accepted as XID_Continue >() {} - - - - - fn main() { 'a๐Ÿ›: { // pointed at on the error from line 5 todo!(); From 4f1676d11c2a13d69ae5e40fc649785b1b4645e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Esteban=20K=C3=BCber?= Date: Wed, 18 Mar 2026 20:04:17 +0000 Subject: [PATCH 6/6] fix test --- compiler/rustc_lexer/src/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs index a7357ba38c8e4..f74ba7f532a68 100644 --- a/compiler/rustc_lexer/src/tests.rs +++ b/compiler/rustc_lexer/src/tests.rs @@ -231,7 +231,7 @@ fn lifetime() { "'abc", FrontmatterAllowed::No, expect![[r#" - Token { kind: Lifetime { starts_with_number: false }, len: 4 } + Token { kind: Lifetime { invalid: false }, len: 4 } "#]], ); }