Auto merge of #149192 - gmorenz:normalize_lifetimes, r=madsmtm

NFC normalize lifetime identifiers

Fixes rust-lang/rust#126759
This commit is contained in:
bors 2025-12-13 21:47:35 +00:00
commit 2cd4ee6bcf
2 changed files with 25 additions and 7 deletions

View file

@ -316,7 +316,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let lifetime_name = self.str_from(start);
let lifetime_name = nfc_normalize(self.str_from(start));
self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
if starts_with_number {
let span = self.mk_sp(start, self.pos);
@ -325,8 +325,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
.with_span(span)
.stash(span, StashKey::LifetimeIsChar);
}
let ident = Symbol::intern(lifetime_name);
token::Lifetime(ident, IdentIsRaw::No)
token::Lifetime(lifetime_name, IdentIsRaw::No)
}
rustc_lexer::TokenKind::RawLifetime => {
self.last_lifetime = Some(self.mk_sp(start, start + BytePos(1)));
@ -373,7 +372,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
String::with_capacity(lifetime_name_without_tick.as_str().len() + 1);
lifetime_name.push('\'');
lifetime_name += lifetime_name_without_tick.as_str();
let sym = Symbol::intern(&lifetime_name);
let sym = nfc_normalize(&lifetime_name);
// Make sure we mark this as a raw identifier.
self.psess.raw_identifier_spans.push(span);
@ -393,9 +392,8 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
self.pos = lt_start;
self.cursor = Cursor::new(&str_before[2 as usize..], FrontmatterAllowed::No);
let lifetime_name = self.str_from(start);
let ident = Symbol::intern(lifetime_name);
token::Lifetime(ident, IdentIsRaw::No)
let lifetime_name = nfc_normalize(self.str_from(start));
token::Lifetime(lifetime_name, IdentIsRaw::No)
}
}
rustc_lexer::TokenKind::Semi => token::Semi,

View file

@ -0,0 +1,20 @@
//@check-pass
//@edition:2021
#![allow(non_snake_case)]
// Tests that identifiers are NFC-normalized as per
// https://rust-lang.github.io/rfcs/2457-non-ascii-idents.html
// Note that in the first argument of each function `K` is LATIN CAPITAL LETTER K
// and in the second it is (KELVIN SIGN).
fn ident_nfc<K>(_p1: K, _p2: ) {}
fn raw_ident_nfc<K>(_p1: r#K, _p2: r#) {}
fn lifetime_nfc<'K>(_p1: &'K str, _p2: &' str) {}
fn raw_lifetime_nfc<'K>(_p1: &'r#K str, _p2: &'r# str) {}
fn main() {}