parser/lexer: bump to Unicode 17, use faster unicode-ident
Replace unicode-xid with unicode-ident which is 6 times faster
This commit is contained in:
parent
c7aa99f36c
commit
ca64688b37
8 changed files with 22 additions and 20 deletions
14
Cargo.lock
14
Cargo.lock
|
|
@ -4146,8 +4146,8 @@ version = "0.0.0"
|
|||
dependencies = [
|
||||
"expect-test",
|
||||
"memchr",
|
||||
"unicode-ident",
|
||||
"unicode-properties",
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -5983,24 +5983,24 @@ checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539"
|
|||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.18"
|
||||
version = "1.0.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
||||
checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.24"
|
||||
version = "0.1.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
|
||||
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
|
||||
dependencies = [
|
||||
"tinyvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-properties"
|
||||
version = "0.1.3"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0"
|
||||
checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-script"
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@ Rust lexer used by rustc. No stability guarantees are provided.
|
|||
# Note that this crate purposefully does not depend on other rustc crates
|
||||
[dependencies]
|
||||
memchr = "2.7.6"
|
||||
unicode-properties = { version = "0.1.0", default-features = false, features = ["emoji"] }
|
||||
unicode-xid = "0.2.0"
|
||||
unicode-properties = { version = "0.1.4", default-features = false, features = ["emoji"] }
|
||||
unicode-ident = "1.0.22"
|
||||
|
||||
[dev-dependencies]
|
||||
expect-test = "1.4.0"
|
||||
|
|
|
|||
|
|
@ -34,8 +34,8 @@ use LiteralKind::*;
|
|||
use TokenKind::*;
|
||||
use cursor::EOF_CHAR;
|
||||
pub use cursor::{Cursor, FrontmatterAllowed};
|
||||
pub use unicode_ident::UNICODE_VERSION as UNICODE_IDENT_VERSION;
|
||||
use unicode_properties::UnicodeEmoji;
|
||||
pub use unicode_xid::UNICODE_VERSION as UNICODE_XID_VERSION;
|
||||
|
||||
/// Parsed token.
|
||||
/// It doesn't contain information about data that has been parsed,
|
||||
|
|
@ -370,14 +370,14 @@ pub fn is_horizontal_whitespace(c: char) -> bool {
|
|||
/// a formal definition of valid identifier name.
|
||||
pub fn is_id_start(c: char) -> bool {
|
||||
// This is XID_Start OR '_' (which formally is not a XID_Start).
|
||||
c == '_' || unicode_xid::UnicodeXID::is_xid_start(c)
|
||||
c == '_' || unicode_ident::is_xid_start(c)
|
||||
}
|
||||
|
||||
/// True if `c` is valid as a non-first character of an identifier.
|
||||
/// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for
|
||||
/// a formal definition of valid identifier name.
|
||||
pub fn is_id_continue(c: char) -> bool {
|
||||
unicode_xid::UnicodeXID::is_xid_continue(c)
|
||||
unicode_ident::is_xid_continue(c)
|
||||
}
|
||||
|
||||
/// The passed string is lexically an identifier.
|
||||
|
|
|
|||
|
|
@ -20,8 +20,8 @@ rustc_session = { path = "../rustc_session" }
|
|||
rustc_span = { path = "../rustc_span" }
|
||||
thin-vec = "0.2.12"
|
||||
tracing = "0.1"
|
||||
unicode-normalization = "0.1.11"
|
||||
unicode-width = "0.2.0"
|
||||
unicode-normalization = "0.1.25"
|
||||
unicode-width = "0.2.2"
|
||||
# tidy-alphabetical-end
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
|||
|
|
@ -21,5 +21,5 @@ scoped-tls = "1.0"
|
|||
sha1 = "0.10.0"
|
||||
sha2 = "0.10.1"
|
||||
tracing = "0.1"
|
||||
unicode-width = "0.2.0"
|
||||
unicode-width = "0.2.2"
|
||||
# tidy-alphabetical-end
|
||||
|
|
|
|||
|
|
@ -466,7 +466,6 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
|
|||
"unicode-script",
|
||||
"unicode-security",
|
||||
"unicode-width",
|
||||
"unicode-xid",
|
||||
"utf8parse",
|
||||
"valuable",
|
||||
"version_check",
|
||||
|
|
|
|||
|
|
@ -22,6 +22,9 @@ fn main() {
|
|||
it should also be updated in the reference at \
|
||||
https://github.com/rust-lang/reference/blob/HEAD/src/identifiers.md."
|
||||
);
|
||||
println!("Unicode XID version is: {:?}", rustc_lexer::UNICODE_XID_VERSION);
|
||||
println!("Unicode normalization version is: {:?}", rustc_parse::UNICODE_NORMALIZATION_VERSION);
|
||||
println!("Unicode version of unicode-ident is: {:?}", rustc_lexer::UNICODE_IDENT_VERSION);
|
||||
println!(
|
||||
"Unicode version of unicode-normalization is: {:?}",
|
||||
rustc_parse::UNICODE_NORMALIZATION_VERSION
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
Checking if Unicode version changed.
|
||||
If the Unicode version changes are intentional, it should also be updated in the reference at https://github.com/rust-lang/reference/blob/HEAD/src/identifiers.md.
|
||||
Unicode XID version is: (16, 0, 0)
|
||||
Unicode normalization version is: (16, 0, 0)
|
||||
Unicode version of unicode-ident is: (17, 0, 0)
|
||||
Unicode version of unicode-normalization is: (17, 0, 0)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue