rust-lang · estebank · Mar 15, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/Cargo.lock b/Cargo.lock
@@ -4450,6 +4450,7 @@ dependencies = [
  "thin-vec",
  "tracing",
  "unicode-normalization",
+ "unicode-properties",
  "unicode-width 0.2.2",
 ]
 

diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
@@ -140,7 +140,7 @@ pub enum TokenKind {
 
     /// A lifetime, e.g. `'a`.
     Lifetime {
-        starts_with_number: bool,
+        invalid: bool,
     },
 
     /// `;`
@@ -584,7 +584,7 @@ impl<'a> Cursor<'a> {
                     let kind = RawStr { n_hashes: res.ok() };
                     Literal { kind, suffix_start }
                 }
-                _ => self.ident_or_unknown_prefix(),
+                _ => self.ident_or_unknown_prefix(false),
             },
 
             // Byte literal, byte string literal, raw byte string literal or identifier.
@@ -603,7 +603,7 @@ impl<'a> Cursor<'a> {
 
             // Identifier (this should be checked after other variant that can
             // start as identifier).
-            c if is_id_start(c) => self.ident_or_unknown_prefix(),
+            c if is_id_start(c) => self.ident_or_unknown_prefix(false),
 
             // Numeric literal.
             c @ '0'..='9' => {
@@ -661,7 +661,7 @@ impl<'a> Cursor<'a> {
                 Literal { kind, suffix_start }
             }
             // Identifier starting with an emoji. Only lexed for graceful error recovery.
-            c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
+            c if is_emoji(c) => self.invalid_ident(),
             _ => Unknown,
         };
         if matches!(self.frontmatter_allowed, FrontmatterAllowed::Yes)
@@ -832,25 +832,22 @@ impl<'a> Cursor<'a> {
         RawIdent
     }
 
-    fn ident_or_unknown_prefix(&mut self) -> TokenKind {
-        debug_assert!(is_id_start(self.prev()));
+    fn ident_or_unknown_prefix(&mut self, already_invalid: bool) -> TokenKind {
+        debug_assert!(is_id_start(self.prev()) || already_invalid);
         // Start is already eaten, eat the rest of identifier.
         self.eat_while(is_id_continue);
         // Known prefixes must have been handled earlier. So if
         // we see a prefix here, it is definitely an unknown prefix.
         match self.first() {
             '#' | '"' | '\'' => UnknownPrefix,
-            c if !c.is_ascii() && c.is_emoji_char() => self.invalid_ident(),
+            c if is_emoji(c) => self.invalid_ident(),
             _ => Ident,
         }
     }
 
     fn invalid_ident(&mut self) -> TokenKind {
         // Start is already eaten, eat the rest of identifier.
-        self.eat_while(|c| {
-            const ZERO_WIDTH_JOINER: char = '\u{200d}';
-            is_id_continue(c) || (!c.is_ascii() && c.is_emoji_char()) || c == ZERO_WIDTH_JOINER
-        });
+        self.eat_while(|c| is_id_continue(c) || is_emoji(c));
         // An invalid identifier followed by '#' or '"' or '\'' could be
         // interpreted as an invalid literal prefix. We don't bother doing that
         // because the treatment of invalid identifiers and invalid prefixes
@@ -895,7 +892,7 @@ impl<'a> Cursor<'a> {
                 let kind = mk_kind_raw(res.ok());
                 Literal { kind, suffix_start }
             }
-            _ => self.ident_or_unknown_prefix(),
+            _ => self.ident_or_unknown_prefix(false),
         }
     }
 
@@ -975,14 +972,18 @@ impl<'a> Cursor<'a> {
     fn lifetime_or_char(&mut self) -> TokenKind {
         debug_assert!(self.prev() == '\'');
 
+        let mut invalid = false;
         let can_be_a_lifetime = if self.second() == '\'' {
             // It's surely not a lifetime.
             false
         } else {
             // If the first symbol is valid for identifier, it can be a lifetime.
             // Also check if it's a number for a better error reporting (so '0 will
             // be reported as invalid lifetime and not as unterminated char literal).
-            is_id_start(self.first()) || self.first().is_ascii_digit()
+            let c = self.first();
+            invalid |= c.is_ascii_digit();
+            invalid |= is_emoji(c);
+            is_id_start(c) || invalid
         };
 
         if !can_be_a_lifetime {
@@ -1012,7 +1013,7 @@ impl<'a> Cursor<'a> {
         // First symbol can be a number (which isn't a valid identifier start),
         // so skip it without any checks.
         self.bump();
-        self.eat_while(is_id_continue);
+        invalid |= matches!(self.ident_or_unknown_prefix(invalid), InvalidIdent);
 
         match self.first() {
             // Check if after skipping literal contents we've met a closing
@@ -1024,7 +1025,7 @@ impl<'a> Cursor<'a> {
                 Literal { kind, suffix_start: self.pos_within_token() }
             }
             '#' if !starts_with_number => UnknownPrefixLifetime,
-            _ => Lifetime { starts_with_number },
+            _ => Lifetime { invalid },
         }
     }
 
@@ -1277,3 +1278,7 @@ impl<'a> Cursor<'a> {
         self.eat_while(is_id_continue);
     }
 }
+
+fn is_emoji(c: char) -> bool {
+    !c.is_ascii() && c.is_emoji_char()
+}
diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs
@@ -231,7 +231,7 @@ fn lifetime() {
         "'abc",
         FrontmatterAllowed::No,
         expect![[r#"
-            Token { kind: Lifetime { starts_with_number: false }, len: 4 }
+            Token { kind: Lifetime { invalid: false }, len: 4 }
         "#]],
     );
 }

diff --git a/compiler/rustc_parse/Cargo.toml b/compiler/rustc_parse/Cargo.toml
@@ -20,6 +20,7 @@ rustc_span = { path = "../rustc_span" }
 thin-vec = "0.2.12"
 tracing = "0.1"
 unicode-normalization = "0.1.25"
+unicode-properties = { version = "0.1.4", default-features = false, features = ["emoji"] }
 unicode-width = "0.2.2"
 # tidy-alphabetical-end
 

diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
@@ -17,6 +17,7 @@ use rustc_session::lint::builtin::{
 use rustc_session::parse::ParseSess;
 use rustc_span::{BytePos, Pos, Span, Symbol, sym};
 use tracing::debug;
+use unicode_properties::emoji::UnicodeEmoji;
 
 use crate::errors;
 use crate::lexer::diagnostics::TokenTreeDiagInfo;
@@ -316,18 +317,42 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
                     self.lint_literal_unicode_text_flow(symbol, kind, self.mk_sp(start, self.pos), "literal");
                     token::Literal(token::Lit { kind, symbol, suffix })
                 }
-                rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
+                rustc_lexer::TokenKind::Lifetime { invalid } => {
                     // Include the leading `'` in the real identifier, for macro
                     // expansion purposes. See #12512 for the gory details of why
                     // this is necessary.
                     let lifetime_name = nfc_normalize(self.str_from(start));
                     self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
-                    if starts_with_number {
-                        let span = self.mk_sp(start, self.pos);
-                        self.dcx()
-                            .struct_err("lifetimes cannot start with a number")
-                            .with_span(span)
-                            .stash(span, StashKey::LifetimeIsChar);
+                    let span = self.mk_sp(start, self.pos);
+                    if invalid {
+                        let name = lifetime_name.as_str();
+                        // skip(1) to skip the `'`
+                        let starts_with_number = matches!(
+                            name.chars().skip(1).next(),
+                            Some(c) if c.is_ascii_digit()
+                        );
+                        if name.chars().any(|c| !c.is_ascii() && c.is_emoji_char()) {
+                            self.psess
+                                .bad_unicode_identifiers
+                                .borrow_mut()
+                                .entry(lifetime_name)
+                                .or_default()
+                                .push(span);
+                        }
+                        if starts_with_number {
+                            let mut err = self.dcx()
+                                .struct_err(format!(
+                                    "lifetimes cannot start with a number: `{name}`"
+                                ))
+                                .with_span(span);
+                            if name.len() > 2 {
+                                // Point at the first lifetime name character.
+                                let start_span = self.mk_sp(start + BytePos(1), start + BytePos(2));
+                                err.span(start_span);
+                                err.span_label(span, "");
+                            }
+                            err.stash(span, StashKey::LifetimeIsChar);
+                        }
                     }
                     token::Lifetime(lifetime_name, IdentIsRaw::No)
                 }

diff --git a/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs b/src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
@@ -255,9 +255,9 @@ impl<'a> Converter<'a> {
                     return;
                 }
 
-                rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
-                    if *starts_with_number {
-                        errors.push("Lifetime name cannot start with a number".into());
+                rustc_lexer::TokenKind::Lifetime { invalid } => {
+                    if *invalid {
+                        errors.push("Lifetime name contains invalid characters".into());
                     }
                     LIFETIME_IDENT
                 }

diff --git a/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.rast b/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.rast
@@ -1,4 +1,4 @@
-LIFETIME_IDENT "'1" error: Lifetime name cannot start with a number
+LIFETIME_IDENT "'1" error: Lifetime name contains invalid characters
 WHITESPACE "\n"
-LIFETIME_IDENT "'1lifetime" error: Lifetime name cannot start with a number
+LIFETIME_IDENT "'1lifetime" error: Lifetime name contains invalid characters
 WHITESPACE "\n"
diff --git a/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.txt b/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/lifetime_starts_with_a_number.txt
@@ -1,4 +1,4 @@
-LIFETIME_IDENT "'1" error: Lifetime name cannot start with a number
+LIFETIME_IDENT "'1" error: Lifetime name contains invalid characters
 WHITESPACE "\n"
-LIFETIME_IDENT "'1lifetime" error: Lifetime name cannot start with a number
+LIFETIME_IDENT "'1lifetime" error: Lifetime name contains invalid characters
 WHITESPACE "\n"
diff --git a/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/unclosed_char_with_ferris.rast b/src/tools/rust-analyzer/crates/parser/test_data/lexer/err/unclosed_char_with_ferris.rast
@@ -1 +1 @@
-CHAR "'🦀" error: Missing trailing `'` symbol to terminate the character literal
+LIFETIME_IDENT "'🦀" error: Lifetime name contains invalid characters
diff --git a/src/tools/rust-analyzer/crates/proc-macro-srv/src/token_stream.rs b/src/tools/rust-analyzer/crates/proc-macro-srv/src/token_stream.rs
@@ -302,9 +302,9 @@ impl<S> TokenStream<S> {
                         span: span.derive_ranged(range),
                     }))
                 }
-                rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
-                    if starts_with_number {
-                        return Err("Lifetime cannot start with a number".to_owned());
+                rustc_lexer::TokenKind::Lifetime { invalid } => {
+                    if invalid {
+                        return Err(format!("Invalid lifetime identifier: `{}`", &s[range]));
                     }
                     let range = range.start + 1..range.end;
                     tokenstream.push(TokenTree::Punct(Punct {

diff --git a/tests/ui/lexer/emoji-in-lifetime.rs b/tests/ui/lexer/emoji-in-lifetime.rs
@@ -0,0 +1,16 @@
+// #141081
+fn bad_lifetime_name<
+    '🐛🐛🐛family👨‍👩‍👧‍👦,//~ ERROR: identifiers cannot contain emoji
+    '12, //~ ERROR: lifetimes cannot start with a number
+    'a🐛, //~ ERROR: identifiers cannot contain emoji
+    '1🐛, //~ ERROR: identifiers cannot contain emoji
+    //~^ ERROR: lifetimes cannot start with a number
+    '1, //~ ERROR: lifetimes cannot start with a number
+    'a‌b // bare zero-width-joiners are accepted as XID_Continue
+>() {}
+
+fn main() {
+    'a🐛: { // pointed at on the error from line 5
+        todo!();
+    };
+}
diff --git a/tests/ui/lexer/emoji-in-lifetime.stderr b/tests/ui/lexer/emoji-in-lifetime.stderr
@@ -0,0 +1,41 @@
+error: identifiers cannot contain emoji: `'🐛🐛🐛family👨👩👧👦`
+  --> $DIR/emoji-in-lifetime.rs:3:5
+   |
+LL |     '🐛🐛🐛family👨👩👧👦,
+   |     ^^^^^^^^^^^^^^^^^^^^^
+
+error: identifiers cannot contain emoji: `'a🐛`
+  --> $DIR/emoji-in-lifetime.rs:5:5
+   |
+LL |     'a🐛,
+   |     ^^^^
+...
+LL |     'a🐛: { // pointed at on the error from line 5
+   |     ^^^^
+
+error: identifiers cannot contain emoji: `'1🐛`
+  --> $DIR/emoji-in-lifetime.rs:6:5
+   |
+LL |     '1🐛,
+   |     ^^^^
+
+error: lifetimes cannot start with a number: `'12`
+  --> $DIR/emoji-in-lifetime.rs:4:6
+   |
+LL |     '12,
+   |     -^-
+
+error: lifetimes cannot start with a number: `'1🐛`
+  --> $DIR/emoji-in-lifetime.rs:6:6
+   |
+LL |     '1🐛,
+   |     -^--
+
+error: lifetimes cannot start with a number: `'1`
+  --> $DIR/emoji-in-lifetime.rs:8:5
+   |
+LL |     '1,
+   |     ^^
+
+error: aborting due to 6 previous errors
+
diff --git a/tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr b/tests/ui/lexer/lex-bad-str-literal-as-char-1.stderr
@@ -10,7 +10,7 @@ LL -     println!('1 + 1');
 LL +     println!("1 + 1");
    |
 
-error: lifetimes cannot start with a number
+error: lifetimes cannot start with a number: `'1`
   --> $DIR/lex-bad-str-literal-as-char-1.rs:3:14
    |
 LL |     println!('1 + 1');

diff --git a/tests/ui/parser/numeric-lifetime.stderr b/tests/ui/parser/numeric-lifetime.stderr
@@ -6,13 +6,13 @@ LL |     let x: usize = "";
    |            |
    |            expected due to this
 
-error: lifetimes cannot start with a number
+error: lifetimes cannot start with a number: `'1`
   --> $DIR/numeric-lifetime.rs:1:10
    |
 LL | struct S<'1> { s: &'1 usize }
    |          ^^
 
-error: lifetimes cannot start with a number
+error: lifetimes cannot start with a number: `'1`
   --> $DIR/numeric-lifetime.rs:1:20
    |
 LL | struct S<'1> { s: &'1 usize }
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		CHAR "'🦀" error: Missing trailing `'` symbol to terminate the character literal
		LIFETIME_IDENT "'🦀" error: Lifetime name contains invalid characters