Auto merge of #146173 - Kmeakin:km/unicode-data/no-ascii, r=jhpratt
Don't include ASCII characters in Unicode tables Split off from https://github.com/rust-lang/rust/pull/145219
This commit is contained in:
commit
beeb8e3af5
7 changed files with 320 additions and 247 deletions
|
|
@ -418,9 +418,8 @@ impl str {
|
|||
}
|
||||
|
||||
fn case_ignorable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool {
|
||||
use core::unicode::{Case_Ignorable, Cased};
|
||||
match iter.skip_while(|&c| Case_Ignorable(c)).next() {
|
||||
Some(c) => Cased(c),
|
||||
match iter.skip_while(|&c| c.is_case_ignorable()).next() {
|
||||
Some(c) => c.is_cased(),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -969,7 +969,43 @@ impl char {
|
|||
#[must_use]
|
||||
#[inline]
|
||||
pub(crate) fn is_grapheme_extended(self) -> bool {
|
||||
unicode::Grapheme_Extend(self)
|
||||
!self.is_ascii() && unicode::Grapheme_Extend(self)
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` has the `Cased` property.
|
||||
///
|
||||
/// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
|
||||
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
|
||||
///
|
||||
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
|
||||
/// [ucd]: https://www.unicode.org/reports/tr44/
|
||||
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
|
||||
#[must_use]
|
||||
#[inline]
|
||||
#[doc(hidden)]
|
||||
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
|
||||
pub fn is_cased(self) -> bool {
|
||||
if self.is_ascii() { self.is_ascii_alphabetic() } else { unicode::Cased(self) }
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` has the `Case_Ignorable` property.
|
||||
///
|
||||
/// `Case_Ignorable` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
|
||||
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
|
||||
///
|
||||
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
|
||||
/// [ucd]: https://www.unicode.org/reports/tr44/
|
||||
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
|
||||
#[must_use]
|
||||
#[inline]
|
||||
#[doc(hidden)]
|
||||
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
|
||||
pub fn is_case_ignorable(self) -> bool {
|
||||
if self.is_ascii() {
|
||||
matches!(self, '\'' | '.' | ':' | '^' | '`')
|
||||
} else {
|
||||
unicode::Case_Ignorable(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if this `char` has one of the general categories for numbers.
|
||||
|
|
|
|||
|
|
@ -1,16 +1,15 @@
|
|||
//! This file is generated by `./x run src/tools/unicode-table-generator`; do not edit manually!
|
||||
// Alphabetic : 1727 bytes, 142759 codepoints in 757 ranges (U+000041 - U+0323B0) using skiplist
|
||||
// Case_Ignorable : 1053 bytes, 2749 codepoints in 452 ranges (U+000027 - U+0E01F0) using skiplist
|
||||
// Cased : 407 bytes, 4578 codepoints in 159 ranges (U+000041 - U+01F18A) using skiplist
|
||||
// Cc : 9 bytes, 65 codepoints in 2 ranges (U+000000 - U+0000A0) using skiplist
|
||||
// Alphabetic : 1723 bytes, 142707 codepoints in 755 ranges (U+0000AA - U+0323B0) using skiplist
|
||||
// Case_Ignorable : 1043 bytes, 2744 codepoints in 447 ranges (U+0000A8 - U+0E01F0) using skiplist
|
||||
// Cased : 403 bytes, 4526 codepoints in 157 ranges (U+0000AA - U+01F18A) using skiplist
|
||||
// Grapheme_Extend : 887 bytes, 2193 codepoints in 375 ranges (U+000300 - U+0E01F0) using skiplist
|
||||
// Lowercase : 935 bytes, 2569 codepoints in 675 ranges (U+000061 - U+01E944) using bitset
|
||||
// N : 457 bytes, 1911 codepoints in 144 ranges (U+000030 - U+01FBFA) using skiplist
|
||||
// Uppercase : 799 bytes, 1978 codepoints in 656 ranges (U+000041 - U+01F18A) using bitset
|
||||
// White_Space : 256 bytes, 25 codepoints in 10 ranges (U+000009 - U+003001) using cascading
|
||||
// Lowercase : 933 bytes, 2543 codepoints in 674 ranges (U+0000AA - U+01E944) using bitset
|
||||
// N : 455 bytes, 1901 codepoints in 143 ranges (U+0000B2 - U+01FBFA) using skiplist
|
||||
// Uppercase : 797 bytes, 1952 codepoints in 655 ranges (U+0000C0 - U+01F18A) using bitset
|
||||
// White_Space : 256 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using cascading
|
||||
// to_lower : 11484 bytes
|
||||
// to_upper : 13432 bytes
|
||||
// Total : 31446 bytes
|
||||
// Total : 31413 bytes
|
||||
|
||||
#[inline(always)]
|
||||
const fn bitset_search<
|
||||
|
|
@ -148,93 +147,100 @@ pub mod alphabetic {
|
|||
use super::ShortOffsetRunHeader;
|
||||
|
||||
static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 53] = [
|
||||
ShortOffsetRunHeader::new(0, 706), ShortOffsetRunHeader::new(16, 4681),
|
||||
ShortOffsetRunHeader::new(418, 5741), ShortOffsetRunHeader::new(456, 7958),
|
||||
ShortOffsetRunHeader::new(556, 9398), ShortOffsetRunHeader::new(627, 11264),
|
||||
ShortOffsetRunHeader::new(629, 12293), ShortOffsetRunHeader::new(667, 13312),
|
||||
ShortOffsetRunHeader::new(691, 19904), ShortOffsetRunHeader::new(692, 42125),
|
||||
ShortOffsetRunHeader::new(694, 42509), ShortOffsetRunHeader::new(698, 55204),
|
||||
ShortOffsetRunHeader::new(788, 63744), ShortOffsetRunHeader::new(793, 64110),
|
||||
ShortOffsetRunHeader::new(794, 64830), ShortOffsetRunHeader::new(816, 66176),
|
||||
ShortOffsetRunHeader::new(857, 67383), ShortOffsetRunHeader::new(904, 73440),
|
||||
ShortOffsetRunHeader::new(1221, 74650), ShortOffsetRunHeader::new(1232, 77712),
|
||||
ShortOffsetRunHeader::new(1237, 78896), ShortOffsetRunHeader::new(1240, 82939),
|
||||
ShortOffsetRunHeader::new(1244, 83527), ShortOffsetRunHeader::new(1246, 90368),
|
||||
ShortOffsetRunHeader::new(1247, 92160), ShortOffsetRunHeader::new(1249, 92729),
|
||||
ShortOffsetRunHeader::new(1250, 93504), ShortOffsetRunHeader::new(1265, 100344),
|
||||
ShortOffsetRunHeader::new(1282, 101590), ShortOffsetRunHeader::new(1284, 110576),
|
||||
ShortOffsetRunHeader::new(1287, 110883), ShortOffsetRunHeader::new(1294, 111356),
|
||||
ShortOffsetRunHeader::new(1304, 113664), ShortOffsetRunHeader::new(1305, 119808),
|
||||
ShortOffsetRunHeader::new(1315, 120486), ShortOffsetRunHeader::new(1352, 122624),
|
||||
ShortOffsetRunHeader::new(1375, 123536), ShortOffsetRunHeader::new(1399, 124112),
|
||||
ShortOffsetRunHeader::new(1403, 124896), ShortOffsetRunHeader::new(1409, 126464),
|
||||
ShortOffsetRunHeader::new(1425, 127280), ShortOffsetRunHeader::new(1491, 131072),
|
||||
ShortOffsetRunHeader::new(1497, 173792), ShortOffsetRunHeader::new(1498, 177978),
|
||||
ShortOffsetRunHeader::new(1500, 183970), ShortOffsetRunHeader::new(1504, 191457),
|
||||
ShortOffsetRunHeader::new(1506, 192094), ShortOffsetRunHeader::new(1508, 194560),
|
||||
ShortOffsetRunHeader::new(1509, 195102), ShortOffsetRunHeader::new(1510, 196608),
|
||||
ShortOffsetRunHeader::new(1511, 201547), ShortOffsetRunHeader::new(1512, 205744),
|
||||
ShortOffsetRunHeader::new(1514, 1319856),
|
||||
ShortOffsetRunHeader::new(0, 706), ShortOffsetRunHeader::new(12, 4681),
|
||||
ShortOffsetRunHeader::new(414, 5741), ShortOffsetRunHeader::new(452, 7958),
|
||||
ShortOffsetRunHeader::new(552, 9398), ShortOffsetRunHeader::new(623, 11264),
|
||||
ShortOffsetRunHeader::new(625, 12293), ShortOffsetRunHeader::new(663, 13312),
|
||||
ShortOffsetRunHeader::new(687, 19904), ShortOffsetRunHeader::new(688, 42125),
|
||||
ShortOffsetRunHeader::new(690, 42509), ShortOffsetRunHeader::new(694, 55204),
|
||||
ShortOffsetRunHeader::new(784, 63744), ShortOffsetRunHeader::new(789, 64110),
|
||||
ShortOffsetRunHeader::new(790, 64830), ShortOffsetRunHeader::new(812, 66176),
|
||||
ShortOffsetRunHeader::new(853, 67383), ShortOffsetRunHeader::new(900, 73440),
|
||||
ShortOffsetRunHeader::new(1217, 74650), ShortOffsetRunHeader::new(1228, 77712),
|
||||
ShortOffsetRunHeader::new(1233, 78896), ShortOffsetRunHeader::new(1236, 82939),
|
||||
ShortOffsetRunHeader::new(1240, 83527), ShortOffsetRunHeader::new(1242, 90368),
|
||||
ShortOffsetRunHeader::new(1243, 92160), ShortOffsetRunHeader::new(1245, 92729),
|
||||
ShortOffsetRunHeader::new(1246, 93504), ShortOffsetRunHeader::new(1261, 100344),
|
||||
ShortOffsetRunHeader::new(1278, 101590), ShortOffsetRunHeader::new(1280, 110576),
|
||||
ShortOffsetRunHeader::new(1283, 110883), ShortOffsetRunHeader::new(1290, 111356),
|
||||
ShortOffsetRunHeader::new(1300, 113664), ShortOffsetRunHeader::new(1301, 119808),
|
||||
ShortOffsetRunHeader::new(1311, 120486), ShortOffsetRunHeader::new(1348, 122624),
|
||||
ShortOffsetRunHeader::new(1371, 123536), ShortOffsetRunHeader::new(1395, 124112),
|
||||
ShortOffsetRunHeader::new(1399, 124896), ShortOffsetRunHeader::new(1405, 126464),
|
||||
ShortOffsetRunHeader::new(1421, 127280), ShortOffsetRunHeader::new(1487, 131072),
|
||||
ShortOffsetRunHeader::new(1493, 173792), ShortOffsetRunHeader::new(1494, 177978),
|
||||
ShortOffsetRunHeader::new(1496, 183970), ShortOffsetRunHeader::new(1500, 191457),
|
||||
ShortOffsetRunHeader::new(1502, 192094), ShortOffsetRunHeader::new(1504, 194560),
|
||||
ShortOffsetRunHeader::new(1505, 195102), ShortOffsetRunHeader::new(1506, 196608),
|
||||
ShortOffsetRunHeader::new(1507, 201547), ShortOffsetRunHeader::new(1508, 205744),
|
||||
ShortOffsetRunHeader::new(1510, 1319856),
|
||||
];
|
||||
static OFFSETS: [u8; 1515] = [
|
||||
65, 26, 6, 26, 47, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 0, 4, 12, 14, 5, 7, 1, 1, 1, 86, 1, 29,
|
||||
18, 1, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 2, 1, 6, 41,
|
||||
39, 14, 1, 1, 1, 2, 1, 2, 1, 1, 8, 27, 4, 4, 29, 11, 5, 56, 1, 7, 14, 102, 1, 8, 4, 8, 4, 3,
|
||||
10, 3, 2, 1, 16, 48, 13, 101, 24, 33, 9, 2, 4, 1, 5, 24, 2, 19, 19, 25, 7, 11, 5, 24, 1, 6,
|
||||
8, 1, 8, 42, 10, 12, 3, 7, 6, 76, 1, 16, 1, 3, 4, 15, 13, 19, 1, 8, 2, 2, 2, 22, 1, 7, 1, 1,
|
||||
3, 4, 3, 8, 2, 2, 2, 2, 1, 1, 8, 1, 4, 2, 1, 5, 12, 2, 10, 1, 4, 3, 1, 6, 4, 2, 2, 22, 1, 7,
|
||||
1, 2, 1, 2, 1, 2, 4, 5, 4, 2, 2, 2, 4, 1, 7, 4, 1, 1, 17, 6, 11, 3, 1, 9, 1, 3, 1, 22, 1, 7,
|
||||
1, 2, 1, 5, 3, 9, 1, 3, 1, 2, 3, 1, 15, 4, 21, 4, 4, 3, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5,
|
||||
3, 8, 2, 2, 2, 2, 9, 2, 4, 2, 1, 5, 13, 1, 16, 2, 1, 6, 3, 3, 1, 4, 3, 2, 1, 1, 1, 2, 3, 2,
|
||||
3, 3, 3, 12, 4, 5, 3, 3, 1, 3, 3, 1, 6, 1, 40, 13, 1, 3, 1, 23, 1, 16, 3, 8, 1, 3, 1, 3, 8,
|
||||
2, 1, 3, 2, 1, 2, 4, 28, 4, 1, 8, 1, 3, 1, 23, 1, 10, 1, 5, 3, 8, 1, 3, 1, 3, 8, 2, 6, 2, 1,
|
||||
4, 13, 3, 12, 13, 1, 3, 1, 41, 2, 8, 1, 3, 1, 3, 1, 1, 5, 4, 7, 5, 22, 6, 1, 3, 1, 18, 3,
|
||||
24, 1, 9, 1, 1, 2, 7, 8, 6, 1, 1, 1, 8, 18, 2, 13, 58, 5, 7, 6, 1, 51, 2, 1, 1, 1, 5, 1, 24,
|
||||
1, 1, 1, 19, 1, 3, 2, 5, 1, 1, 6, 1, 14, 4, 32, 1, 63, 8, 1, 36, 4, 19, 4, 16, 1, 36, 67,
|
||||
55, 1, 1, 2, 5, 16, 64, 10, 4, 2, 38, 1, 1, 5, 1, 2, 43, 1, 0, 1, 4, 2, 7, 1, 1, 1, 4, 2,
|
||||
41, 1, 4, 2, 33, 1, 4, 2, 7, 1, 1, 1, 4, 2, 15, 1, 57, 1, 4, 2, 67, 37, 16, 16, 86, 2, 6, 3,
|
||||
0, 2, 17, 1, 26, 5, 75, 3, 11, 7, 20, 11, 21, 12, 20, 12, 13, 1, 3, 1, 2, 12, 52, 2, 19, 14,
|
||||
1, 4, 1, 67, 89, 7, 43, 5, 70, 10, 31, 1, 12, 4, 9, 23, 30, 2, 5, 11, 44, 4, 26, 54, 28, 4,
|
||||
63, 2, 20, 50, 1, 23, 2, 11, 3, 49, 52, 1, 15, 1, 8, 51, 42, 2, 4, 10, 44, 1, 11, 14, 55,
|
||||
22, 3, 10, 36, 2, 11, 5, 43, 2, 3, 41, 4, 1, 6, 1, 2, 3, 1, 5, 192, 19, 34, 11, 0, 2, 6, 2,
|
||||
38, 2, 6, 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13,
|
||||
5, 3, 1, 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4,
|
||||
1, 11, 2, 4, 5, 5, 4, 1, 17, 41, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 2, 56, 7, 1,
|
||||
16, 23, 9, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 32, 47, 1, 0, 3, 25, 9, 7, 5, 2,
|
||||
5, 4, 86, 6, 3, 1, 90, 1, 4, 5, 43, 1, 94, 17, 32, 48, 16, 0, 0, 64, 0, 67, 46, 2, 0, 3, 16,
|
||||
10, 2, 20, 47, 5, 8, 3, 113, 39, 9, 2, 103, 2, 67, 2, 2, 1, 1, 1, 8, 21, 20, 1, 33, 24, 52,
|
||||
12, 68, 1, 1, 44, 6, 3, 1, 1, 3, 10, 33, 5, 35, 13, 29, 3, 51, 1, 12, 15, 1, 16, 16, 10, 5,
|
||||
1, 55, 9, 14, 18, 23, 3, 69, 1, 1, 1, 1, 24, 3, 2, 16, 2, 4, 11, 6, 2, 6, 2, 6, 9, 7, 1, 7,
|
||||
1, 43, 1, 14, 6, 123, 21, 0, 12, 23, 4, 49, 0, 0, 2, 106, 38, 7, 12, 5, 5, 12, 1, 13, 1, 5,
|
||||
1, 1, 1, 2, 1, 2, 1, 108, 33, 0, 18, 64, 2, 54, 40, 12, 116, 5, 1, 135, 36, 26, 6, 26, 11,
|
||||
89, 3, 6, 2, 6, 2, 6, 2, 3, 35, 12, 1, 26, 1, 19, 1, 2, 1, 15, 2, 14, 34, 123, 69, 53, 0,
|
||||
29, 3, 49, 47, 32, 13, 30, 5, 43, 5, 30, 2, 36, 4, 8, 1, 5, 42, 158, 18, 36, 4, 36, 4, 40,
|
||||
8, 52, 12, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 3, 52, 12, 0, 9, 22, 10, 8, 24,
|
||||
6, 1, 42, 1, 9, 69, 6, 2, 1, 1, 44, 1, 2, 3, 1, 2, 23, 10, 23, 9, 31, 65, 19, 1, 2, 10, 22,
|
||||
10, 26, 70, 56, 6, 2, 64, 4, 1, 2, 5, 8, 1, 3, 1, 29, 42, 29, 3, 29, 35, 8, 1, 28, 27, 54,
|
||||
10, 22, 10, 19, 13, 18, 110, 73, 55, 51, 13, 51, 13, 40, 34, 28, 3, 1, 5, 23, 250, 42, 1, 2,
|
||||
3, 2, 16, 3, 55, 1, 3, 29, 10, 1, 8, 22, 42, 18, 46, 21, 27, 23, 9, 70, 43, 5, 10, 57, 9, 1,
|
||||
13, 25, 23, 51, 17, 4, 8, 35, 3, 1, 9, 64, 1, 4, 9, 2, 10, 1, 1, 1, 35, 18, 1, 34, 2, 1, 6,
|
||||
4, 62, 7, 1, 1, 1, 4, 1, 15, 1, 10, 7, 57, 23, 4, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, 8,
|
||||
2, 2, 2, 2, 3, 1, 6, 1, 5, 7, 28, 10, 1, 1, 2, 1, 1, 38, 1, 10, 1, 1, 2, 1, 1, 4, 1, 2, 3,
|
||||
1, 1, 1, 44, 66, 1, 3, 1, 4, 20, 3, 30, 66, 2, 2, 1, 1, 184, 54, 2, 7, 25, 6, 34, 63, 1, 1,
|
||||
3, 1, 59, 54, 2, 1, 71, 27, 2, 14, 21, 7, 185, 57, 103, 64, 31, 8, 2, 1, 2, 8, 1, 2, 1, 30,
|
||||
1, 2, 2, 2, 2, 4, 93, 8, 2, 46, 2, 6, 1, 1, 1, 2, 27, 51, 2, 10, 17, 72, 5, 1, 18, 73, 199,
|
||||
33, 31, 9, 1, 45, 1, 7, 1, 1, 49, 30, 2, 22, 1, 14, 73, 7, 1, 2, 1, 44, 3, 1, 1, 2, 1, 3, 1,
|
||||
1, 2, 2, 24, 6, 1, 2, 1, 37, 1, 2, 1, 4, 1, 1, 0, 23, 9, 17, 1, 41, 3, 3, 111, 1, 79, 0,
|
||||
102, 111, 17, 196, 0, 97, 15, 0, 17, 6, 25, 0, 5, 0, 0, 47, 0, 0, 7, 31, 17, 79, 17, 30, 18,
|
||||
48, 16, 4, 31, 21, 5, 19, 0, 45, 211, 64, 128, 75, 4, 57, 7, 17, 64, 2, 1, 1, 12, 2, 14, 0,
|
||||
8, 0, 41, 10, 0, 4, 1, 7, 1, 2, 1, 0, 15, 1, 29, 3, 2, 1, 14, 4, 8, 0, 0, 107, 5, 13, 3, 9,
|
||||
7, 10, 4, 1, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1,
|
||||
7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1,
|
||||
25, 1, 31, 1, 25, 1, 8, 0, 31, 6, 6, 213, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 112, 45,
|
||||
10, 7, 16, 1, 0, 30, 18, 44, 0, 28, 228, 30, 2, 1, 0, 7, 1, 4, 1, 2, 1, 15, 1, 197, 59, 68,
|
||||
3, 1, 3, 1, 0, 4, 1, 27, 1, 2, 1, 1, 2, 1, 1, 10, 1, 4, 1, 1, 1, 1, 6, 1, 4, 1, 1, 1, 1, 1,
|
||||
1, 3, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 4, 1, 7, 1, 4, 1, 4, 1, 1, 1,
|
||||
10, 1, 17, 5, 3, 1, 5, 1, 17, 0, 26, 6, 26, 6, 26, 0, 0, 32, 0, 6, 222, 2, 0, 14, 0, 15, 0,
|
||||
0, 0, 0, 0, 5, 0, 0,
|
||||
static OFFSETS: [u8; 1511] = [
|
||||
170, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 0, 4, 12, 14, 5, 7, 1, 1, 1, 86, 1, 29, 18, 1, 2, 2,
|
||||
4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 2, 1, 6, 41, 39, 14, 1, 1,
|
||||
1, 2, 1, 2, 1, 1, 8, 27, 4, 4, 29, 11, 5, 56, 1, 7, 14, 102, 1, 8, 4, 8, 4, 3, 10, 3, 2, 1,
|
||||
16, 48, 13, 101, 24, 33, 9, 2, 4, 1, 5, 24, 2, 19, 19, 25, 7, 11, 5, 24, 1, 6, 8, 1, 8, 42,
|
||||
10, 12, 3, 7, 6, 76, 1, 16, 1, 3, 4, 15, 13, 19, 1, 8, 2, 2, 2, 22, 1, 7, 1, 1, 3, 4, 3, 8,
|
||||
2, 2, 2, 2, 1, 1, 8, 1, 4, 2, 1, 5, 12, 2, 10, 1, 4, 3, 1, 6, 4, 2, 2, 22, 1, 7, 1, 2, 1, 2,
|
||||
1, 2, 4, 5, 4, 2, 2, 2, 4, 1, 7, 4, 1, 1, 17, 6, 11, 3, 1, 9, 1, 3, 1, 22, 1, 7, 1, 2, 1, 5,
|
||||
3, 9, 1, 3, 1, 2, 3, 1, 15, 4, 21, 4, 4, 3, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, 8, 2, 2,
|
||||
2, 2, 9, 2, 4, 2, 1, 5, 13, 1, 16, 2, 1, 6, 3, 3, 1, 4, 3, 2, 1, 1, 1, 2, 3, 2, 3, 3, 3, 12,
|
||||
4, 5, 3, 3, 1, 3, 3, 1, 6, 1, 40, 13, 1, 3, 1, 23, 1, 16, 3, 8, 1, 3, 1, 3, 8, 2, 1, 3, 2,
|
||||
1, 2, 4, 28, 4, 1, 8, 1, 3, 1, 23, 1, 10, 1, 5, 3, 8, 1, 3, 1, 3, 8, 2, 6, 2, 1, 4, 13, 3,
|
||||
12, 13, 1, 3, 1, 41, 2, 8, 1, 3, 1, 3, 1, 1, 5, 4, 7, 5, 22, 6, 1, 3, 1, 18, 3, 24, 1, 9, 1,
|
||||
1, 2, 7, 8, 6, 1, 1, 1, 8, 18, 2, 13, 58, 5, 7, 6, 1, 51, 2, 1, 1, 1, 5, 1, 24, 1, 1, 1, 19,
|
||||
1, 3, 2, 5, 1, 1, 6, 1, 14, 4, 32, 1, 63, 8, 1, 36, 4, 19, 4, 16, 1, 36, 67, 55, 1, 1, 2, 5,
|
||||
16, 64, 10, 4, 2, 38, 1, 1, 5, 1, 2, 43, 1, 0, 1, 4, 2, 7, 1, 1, 1, 4, 2, 41, 1, 4, 2, 33,
|
||||
1, 4, 2, 7, 1, 1, 1, 4, 2, 15, 1, 57, 1, 4, 2, 67, 37, 16, 16, 86, 2, 6, 3, 0, 2, 17, 1, 26,
|
||||
5, 75, 3, 11, 7, 20, 11, 21, 12, 20, 12, 13, 1, 3, 1, 2, 12, 52, 2, 19, 14, 1, 4, 1, 67, 89,
|
||||
7, 43, 5, 70, 10, 31, 1, 12, 4, 9, 23, 30, 2, 5, 11, 44, 4, 26, 54, 28, 4, 63, 2, 20, 50, 1,
|
||||
23, 2, 11, 3, 49, 52, 1, 15, 1, 8, 51, 42, 2, 4, 10, 44, 1, 11, 14, 55, 22, 3, 10, 36, 2,
|
||||
11, 5, 43, 2, 3, 41, 4, 1, 6, 1, 2, 3, 1, 5, 192, 19, 34, 11, 0, 2, 6, 2, 38, 2, 6, 2, 8, 1,
|
||||
1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116, 1,
|
||||
13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 11, 2, 4, 5, 5,
|
||||
4, 1, 17, 41, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 2, 56, 7, 1, 16, 23, 9, 7, 1,
|
||||
7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 7, 1, 32, 47, 1, 0, 3, 25, 9, 7, 5, 2, 5, 4, 86, 6, 3,
|
||||
1, 90, 1, 4, 5, 43, 1, 94, 17, 32, 48, 16, 0, 0, 64, 0, 67, 46, 2, 0, 3, 16, 10, 2, 20, 47,
|
||||
5, 8, 3, 113, 39, 9, 2, 103, 2, 67, 2, 2, 1, 1, 1, 8, 21, 20, 1, 33, 24, 52, 12, 68, 1, 1,
|
||||
44, 6, 3, 1, 1, 3, 10, 33, 5, 35, 13, 29, 3, 51, 1, 12, 15, 1, 16, 16, 10, 5, 1, 55, 9, 14,
|
||||
18, 23, 3, 69, 1, 1, 1, 1, 24, 3, 2, 16, 2, 4, 11, 6, 2, 6, 2, 6, 9, 7, 1, 7, 1, 43, 1, 14,
|
||||
6, 123, 21, 0, 12, 23, 4, 49, 0, 0, 2, 106, 38, 7, 12, 5, 5, 12, 1, 13, 1, 5, 1, 1, 1, 2, 1,
|
||||
2, 1, 108, 33, 0, 18, 64, 2, 54, 40, 12, 116, 5, 1, 135, 36, 26, 6, 26, 11, 89, 3, 6, 2, 6,
|
||||
2, 6, 2, 3, 35, 12, 1, 26, 1, 19, 1, 2, 1, 15, 2, 14, 34, 123, 69, 53, 0, 29, 3, 49, 47, 32,
|
||||
13, 30, 5, 43, 5, 30, 2, 36, 4, 8, 1, 5, 42, 158, 18, 36, 4, 36, 4, 40, 8, 52, 12, 11, 1,
|
||||
15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 3, 52, 12, 0, 9, 22, 10, 8, 24, 6, 1, 42, 1, 9,
|
||||
69, 6, 2, 1, 1, 44, 1, 2, 3, 1, 2, 23, 10, 23, 9, 31, 65, 19, 1, 2, 10, 22, 10, 26, 70, 56,
|
||||
6, 2, 64, 4, 1, 2, 5, 8, 1, 3, 1, 29, 42, 29, 3, 29, 35, 8, 1, 28, 27, 54, 10, 22, 10, 19,
|
||||
13, 18, 110, 73, 55, 51, 13, 51, 13, 40, 34, 28, 3, 1, 5, 23, 250, 42, 1, 2, 3, 2, 16, 3,
|
||||
55, 1, 3, 29, 10, 1, 8, 22, 42, 18, 46, 21, 27, 23, 9, 70, 43, 5, 10, 57, 9, 1, 13, 25, 23,
|
||||
51, 17, 4, 8, 35, 3, 1, 9, 64, 1, 4, 9, 2, 10, 1, 1, 1, 35, 18, 1, 34, 2, 1, 6, 4, 62, 7, 1,
|
||||
1, 1, 4, 1, 15, 1, 10, 7, 57, 23, 4, 1, 8, 2, 2, 2, 22, 1, 7, 1, 2, 1, 5, 3, 8, 2, 2, 2, 2,
|
||||
3, 1, 6, 1, 5, 7, 28, 10, 1, 1, 2, 1, 1, 38, 1, 10, 1, 1, 2, 1, 1, 4, 1, 2, 3, 1, 1, 1, 44,
|
||||
66, 1, 3, 1, 4, 20, 3, 30, 66, 2, 2, 1, 1, 184, 54, 2, 7, 25, 6, 34, 63, 1, 1, 3, 1, 59, 54,
|
||||
2, 1, 71, 27, 2, 14, 21, 7, 185, 57, 103, 64, 31, 8, 2, 1, 2, 8, 1, 2, 1, 30, 1, 2, 2, 2, 2,
|
||||
4, 93, 8, 2, 46, 2, 6, 1, 1, 1, 2, 27, 51, 2, 10, 17, 72, 5, 1, 18, 73, 199, 33, 31, 9, 1,
|
||||
45, 1, 7, 1, 1, 49, 30, 2, 22, 1, 14, 73, 7, 1, 2, 1, 44, 3, 1, 1, 2, 1, 3, 1, 1, 2, 2, 24,
|
||||
6, 1, 2, 1, 37, 1, 2, 1, 4, 1, 1, 0, 23, 9, 17, 1, 41, 3, 3, 111, 1, 79, 0, 102, 111, 17,
|
||||
196, 0, 97, 15, 0, 17, 6, 25, 0, 5, 0, 0, 47, 0, 0, 7, 31, 17, 79, 17, 30, 18, 48, 16, 4,
|
||||
31, 21, 5, 19, 0, 45, 211, 64, 128, 75, 4, 57, 7, 17, 64, 2, 1, 1, 12, 2, 14, 0, 8, 0, 41,
|
||||
10, 0, 4, 1, 7, 1, 2, 1, 0, 15, 1, 29, 3, 2, 1, 14, 4, 8, 0, 0, 107, 5, 13, 3, 9, 7, 10, 4,
|
||||
1, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28,
|
||||
1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31,
|
||||
1, 25, 1, 8, 0, 31, 6, 6, 213, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 112, 45, 10, 7, 16,
|
||||
1, 0, 30, 18, 44, 0, 28, 228, 30, 2, 1, 0, 7, 1, 4, 1, 2, 1, 15, 1, 197, 59, 68, 3, 1, 3, 1,
|
||||
0, 4, 1, 27, 1, 2, 1, 1, 2, 1, 1, 10, 1, 4, 1, 1, 1, 1, 6, 1, 4, 1, 1, 1, 1, 1, 1, 3, 1, 2,
|
||||
1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 4, 1, 7, 1, 4, 1, 4, 1, 1, 1, 10, 1, 17,
|
||||
5, 3, 1, 5, 1, 17, 0, 26, 6, 26, 6, 26, 0, 0, 32, 0, 6, 222, 2, 0, 14, 0, 15, 0, 0, 0, 0, 0,
|
||||
5, 0, 0,
|
||||
];
|
||||
#[inline]
|
||||
pub fn lookup(c: char) -> bool {
|
||||
debug_assert!(!c.is_ascii());
|
||||
(c as u32) >= 0xaa && lookup_slow(c)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn lookup_slow(c: char) -> bool {
|
||||
const {
|
||||
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
|
||||
let mut i = 0;
|
||||
|
|
@ -254,62 +260,69 @@ pub mod case_ignorable {
|
|||
use super::ShortOffsetRunHeader;
|
||||
|
||||
static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 37] = [
|
||||
ShortOffsetRunHeader::new(0, 688), ShortOffsetRunHeader::new(21, 4957),
|
||||
ShortOffsetRunHeader::new(273, 5906), ShortOffsetRunHeader::new(275, 8125),
|
||||
ShortOffsetRunHeader::new(385, 11388), ShortOffsetRunHeader::new(419, 12293),
|
||||
ShortOffsetRunHeader::new(431, 40981), ShortOffsetRunHeader::new(443, 42232),
|
||||
ShortOffsetRunHeader::new(445, 42508), ShortOffsetRunHeader::new(447, 64286),
|
||||
ShortOffsetRunHeader::new(543, 65024), ShortOffsetRunHeader::new(547, 66045),
|
||||
ShortOffsetRunHeader::new(577, 67456), ShortOffsetRunHeader::new(583, 68097),
|
||||
ShortOffsetRunHeader::new(589, 68900), ShortOffsetRunHeader::new(601, 69291),
|
||||
ShortOffsetRunHeader::new(609, 71727), ShortOffsetRunHeader::new(733, 71995),
|
||||
ShortOffsetRunHeader::new(737, 72752), ShortOffsetRunHeader::new(765, 73459),
|
||||
ShortOffsetRunHeader::new(795, 78896), ShortOffsetRunHeader::new(807, 90398),
|
||||
ShortOffsetRunHeader::new(811, 92912), ShortOffsetRunHeader::new(815, 93504),
|
||||
ShortOffsetRunHeader::new(821, 94031), ShortOffsetRunHeader::new(825, 110576),
|
||||
ShortOffsetRunHeader::new(833, 113821), ShortOffsetRunHeader::new(839, 118528),
|
||||
ShortOffsetRunHeader::new(843, 119143), ShortOffsetRunHeader::new(847, 121344),
|
||||
ShortOffsetRunHeader::new(857, 122880), ShortOffsetRunHeader::new(869, 123566),
|
||||
ShortOffsetRunHeader::new(885, 124139), ShortOffsetRunHeader::new(889, 125136),
|
||||
ShortOffsetRunHeader::new(893, 127995), ShortOffsetRunHeader::new(897, 917505),
|
||||
ShortOffsetRunHeader::new(899, 2032112),
|
||||
ShortOffsetRunHeader::new(0, 688), ShortOffsetRunHeader::new(11, 4957),
|
||||
ShortOffsetRunHeader::new(263, 5906), ShortOffsetRunHeader::new(265, 8125),
|
||||
ShortOffsetRunHeader::new(375, 11388), ShortOffsetRunHeader::new(409, 12293),
|
||||
ShortOffsetRunHeader::new(421, 40981), ShortOffsetRunHeader::new(433, 42232),
|
||||
ShortOffsetRunHeader::new(435, 42508), ShortOffsetRunHeader::new(437, 64286),
|
||||
ShortOffsetRunHeader::new(533, 65024), ShortOffsetRunHeader::new(537, 66045),
|
||||
ShortOffsetRunHeader::new(567, 67456), ShortOffsetRunHeader::new(573, 68097),
|
||||
ShortOffsetRunHeader::new(579, 68900), ShortOffsetRunHeader::new(591, 69291),
|
||||
ShortOffsetRunHeader::new(599, 71727), ShortOffsetRunHeader::new(723, 71995),
|
||||
ShortOffsetRunHeader::new(727, 72752), ShortOffsetRunHeader::new(755, 73459),
|
||||
ShortOffsetRunHeader::new(785, 78896), ShortOffsetRunHeader::new(797, 90398),
|
||||
ShortOffsetRunHeader::new(801, 92912), ShortOffsetRunHeader::new(805, 93504),
|
||||
ShortOffsetRunHeader::new(811, 94031), ShortOffsetRunHeader::new(815, 110576),
|
||||
ShortOffsetRunHeader::new(823, 113821), ShortOffsetRunHeader::new(829, 118528),
|
||||
ShortOffsetRunHeader::new(833, 119143), ShortOffsetRunHeader::new(837, 121344),
|
||||
ShortOffsetRunHeader::new(847, 122880), ShortOffsetRunHeader::new(859, 123566),
|
||||
ShortOffsetRunHeader::new(875, 124139), ShortOffsetRunHeader::new(879, 125136),
|
||||
ShortOffsetRunHeader::new(883, 127995), ShortOffsetRunHeader::new(887, 917505),
|
||||
ShortOffsetRunHeader::new(889, 2032112),
|
||||
];
|
||||
static OFFSETS: [u8; 905] = [
|
||||
39, 1, 6, 1, 11, 1, 35, 1, 1, 1, 71, 1, 4, 1, 1, 1, 4, 1, 2, 2, 0, 192, 4, 2, 4, 1, 9, 2,
|
||||
1, 1, 251, 7, 207, 1, 5, 1, 49, 45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35,
|
||||
1, 10, 21, 16, 1, 101, 8, 1, 10, 1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24,
|
||||
24, 43, 3, 44, 1, 7, 2, 5, 9, 41, 58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1,
|
||||
58, 1, 4, 4, 8, 1, 20, 2, 26, 1, 2, 2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2,
|
||||
57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6, 1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1,
|
||||
61, 1, 12, 1, 50, 1, 3, 1, 55, 1, 1, 3, 5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1,
|
||||
5, 2, 20, 2, 28, 2, 57, 2, 4, 4, 8, 1, 20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9,
|
||||
98, 1, 2, 9, 9, 1, 1, 7, 73, 2, 27, 1, 1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1,
|
||||
102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3, 16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3,
|
||||
29, 2, 30, 2, 30, 2, 64, 2, 1, 7, 8, 1, 2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118,
|
||||
3, 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58, 1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 31,
|
||||
49, 4, 48, 1, 1, 5, 1, 1, 5, 1, 40, 9, 12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3,
|
||||
58, 8, 2, 2, 64, 6, 82, 3, 1, 13, 1, 7, 4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1,
|
||||
3, 11, 3, 13, 3, 13, 3, 13, 2, 12, 5, 8, 2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1,
|
||||
16, 13, 51, 33, 0, 2, 113, 3, 125, 1, 15, 1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6,
|
||||
93, 3, 0, 1, 0, 6, 0, 1, 98, 4, 1, 10, 1, 1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 103, 3, 3,
|
||||
2, 8, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2, 26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4,
|
||||
2, 2, 17, 1, 21, 2, 66, 6, 2, 2, 2, 2, 12, 1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2,
|
||||
1, 1, 27, 1, 14, 2, 5, 2, 1, 1, 100, 5, 9, 3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3,
|
||||
1, 12, 16, 34, 1, 2, 1, 169, 1, 7, 1, 6, 1, 11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3,
|
||||
0, 1, 226, 1, 149, 5, 0, 6, 1, 42, 1, 9, 0, 3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 38, 1,
|
||||
26, 5, 1, 1, 0, 2, 79, 4, 70, 11, 49, 4, 123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2,
|
||||
1, 4, 1, 10, 1, 50, 3, 36, 5, 1, 8, 62, 1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6,
|
||||
1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 3, 1, 37, 7, 3, 5, 70, 6, 13, 1, 1, 1, 1, 1, 14, 2, 85,
|
||||
8, 2, 3, 1, 1, 23, 1, 84, 6, 1, 1, 4, 2, 1, 2, 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2,
|
||||
106, 1, 1, 1, 2, 6, 1, 1, 101, 1, 1, 1, 2, 4, 1, 5, 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, 144, 4, 2,
|
||||
2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1, 9, 6, 2, 3, 46, 13, 1, 2, 0, 7, 1, 6, 1, 1, 82, 22, 2,
|
||||
7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1, 7, 1, 1, 72, 2, 3, 1, 1, 1, 0, 2, 11, 2, 52, 5, 5, 1,
|
||||
1, 1, 23, 1, 0, 17, 6, 15, 0, 12, 3, 3, 0, 5, 59, 7, 9, 4, 0, 3, 40, 2, 0, 1, 63, 17, 64, 2,
|
||||
1, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4, 0, 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3, 0, 55,
|
||||
4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0, 7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160, 14, 0,
|
||||
1, 61, 4, 0, 5, 254, 2, 0, 7, 109, 8, 0, 5, 0, 1, 30, 96, 128, 240, 0,
|
||||
static OFFSETS: [u8; 895] = [
|
||||
168, 1, 4, 1, 1, 1, 4, 1, 2, 2, 0, 192, 4, 2, 4, 1, 9, 2, 1, 1, 251, 7, 207, 1, 5, 1, 49,
|
||||
45, 1, 1, 1, 2, 1, 2, 1, 1, 44, 1, 11, 6, 10, 11, 1, 1, 35, 1, 10, 21, 16, 1, 101, 8, 1, 10,
|
||||
1, 4, 33, 1, 1, 1, 30, 27, 91, 11, 58, 11, 4, 1, 2, 1, 24, 24, 43, 3, 44, 1, 7, 2, 5, 9, 41,
|
||||
58, 55, 1, 1, 1, 4, 8, 4, 1, 3, 7, 10, 2, 13, 1, 15, 1, 58, 1, 4, 4, 8, 1, 20, 2, 26, 1, 2,
|
||||
2, 57, 1, 4, 2, 4, 2, 2, 3, 3, 1, 30, 2, 3, 1, 11, 2, 57, 1, 4, 5, 1, 2, 4, 1, 20, 2, 22, 6,
|
||||
1, 1, 58, 1, 2, 1, 1, 4, 8, 1, 7, 2, 11, 2, 30, 1, 61, 1, 12, 1, 50, 1, 3, 1, 55, 1, 1, 3,
|
||||
5, 3, 1, 4, 7, 2, 11, 2, 29, 1, 58, 1, 2, 1, 6, 1, 5, 2, 20, 2, 28, 2, 57, 2, 4, 4, 8, 1,
|
||||
20, 2, 29, 1, 72, 1, 7, 3, 1, 1, 90, 1, 2, 7, 11, 9, 98, 1, 2, 9, 9, 1, 1, 7, 73, 2, 27, 1,
|
||||
1, 1, 1, 1, 55, 14, 1, 5, 1, 2, 5, 11, 1, 36, 9, 1, 102, 4, 1, 6, 1, 2, 2, 2, 25, 2, 4, 3,
|
||||
16, 4, 13, 1, 2, 2, 6, 1, 15, 1, 94, 1, 0, 3, 0, 3, 29, 2, 30, 2, 30, 2, 64, 2, 1, 7, 8, 1,
|
||||
2, 11, 3, 1, 5, 1, 45, 5, 51, 1, 65, 2, 34, 1, 118, 3, 4, 2, 9, 1, 6, 3, 219, 2, 2, 1, 58,
|
||||
1, 1, 7, 1, 1, 1, 1, 2, 8, 6, 10, 2, 1, 39, 1, 8, 31, 49, 4, 48, 1, 1, 5, 1, 1, 5, 1, 40, 9,
|
||||
12, 2, 32, 4, 2, 2, 1, 3, 56, 1, 1, 2, 3, 1, 1, 3, 58, 8, 2, 2, 64, 6, 82, 3, 1, 13, 1, 7,
|
||||
4, 1, 6, 1, 3, 2, 50, 63, 13, 1, 34, 101, 0, 1, 1, 3, 11, 3, 13, 3, 13, 3, 13, 2, 12, 5, 8,
|
||||
2, 10, 1, 2, 1, 2, 5, 49, 5, 1, 10, 1, 1, 13, 1, 16, 13, 51, 33, 0, 2, 113, 3, 125, 1, 15,
|
||||
1, 96, 32, 47, 1, 0, 1, 36, 4, 3, 5, 5, 1, 93, 6, 93, 3, 0, 1, 0, 6, 0, 1, 98, 4, 1, 10, 1,
|
||||
1, 28, 4, 80, 2, 14, 34, 78, 1, 23, 3, 103, 3, 3, 2, 8, 1, 3, 1, 4, 1, 25, 2, 5, 1, 151, 2,
|
||||
26, 18, 13, 1, 38, 8, 25, 11, 46, 3, 48, 1, 2, 4, 2, 2, 17, 1, 21, 2, 66, 6, 2, 2, 2, 2, 12,
|
||||
1, 8, 1, 35, 1, 11, 1, 51, 1, 1, 3, 2, 2, 5, 2, 1, 1, 27, 1, 14, 2, 5, 2, 1, 1, 100, 5, 9,
|
||||
3, 121, 1, 2, 1, 4, 1, 0, 1, 147, 17, 0, 16, 3, 1, 12, 16, 34, 1, 2, 1, 169, 1, 7, 1, 6, 1,
|
||||
11, 1, 35, 1, 1, 1, 47, 1, 45, 2, 67, 1, 21, 3, 0, 1, 226, 1, 149, 5, 0, 6, 1, 42, 1, 9, 0,
|
||||
3, 1, 2, 5, 4, 40, 3, 4, 1, 165, 2, 0, 4, 38, 1, 26, 5, 1, 1, 0, 2, 79, 4, 70, 11, 49, 4,
|
||||
123, 1, 54, 15, 41, 1, 2, 2, 10, 3, 49, 4, 2, 2, 2, 1, 4, 1, 10, 1, 50, 3, 36, 5, 1, 8, 62,
|
||||
1, 12, 2, 52, 9, 10, 4, 2, 1, 95, 3, 2, 1, 1, 2, 6, 1, 2, 1, 157, 1, 3, 8, 21, 2, 57, 2, 3,
|
||||
1, 37, 7, 3, 5, 70, 6, 13, 1, 1, 1, 1, 1, 14, 2, 85, 8, 2, 3, 1, 1, 23, 1, 84, 6, 1, 1, 4,
|
||||
2, 1, 2, 238, 4, 6, 2, 1, 2, 27, 2, 85, 8, 2, 1, 1, 2, 106, 1, 1, 1, 2, 6, 1, 1, 101, 1, 1,
|
||||
1, 2, 4, 1, 5, 0, 9, 1, 2, 0, 2, 1, 1, 4, 1, 144, 4, 2, 2, 4, 1, 32, 10, 40, 6, 2, 4, 8, 1,
|
||||
9, 6, 2, 3, 46, 13, 1, 2, 0, 7, 1, 6, 1, 1, 82, 22, 2, 7, 1, 2, 1, 2, 122, 6, 3, 1, 1, 2, 1,
|
||||
7, 1, 1, 72, 2, 3, 1, 1, 1, 0, 2, 11, 2, 52, 5, 5, 1, 1, 1, 23, 1, 0, 17, 6, 15, 0, 12, 3,
|
||||
3, 0, 5, 59, 7, 9, 4, 0, 3, 40, 2, 0, 1, 63, 17, 64, 2, 1, 2, 0, 4, 1, 7, 1, 2, 0, 2, 1, 4,
|
||||
0, 46, 2, 23, 0, 3, 9, 16, 2, 7, 30, 4, 148, 3, 0, 55, 4, 50, 8, 1, 14, 1, 22, 5, 1, 15, 0,
|
||||
7, 1, 17, 2, 7, 1, 2, 1, 5, 5, 62, 33, 1, 160, 14, 0, 1, 61, 4, 0, 5, 254, 2, 0, 7, 109, 8,
|
||||
0, 5, 0, 1, 30, 96, 128, 240, 0,
|
||||
];
|
||||
#[inline]
|
||||
pub fn lookup(c: char) -> bool {
|
||||
debug_assert!(!c.is_ascii());
|
||||
(c as u32) >= 0xa8 && lookup_slow(c)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn lookup_slow(c: char) -> bool {
|
||||
const {
|
||||
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
|
||||
let mut i = 0;
|
||||
|
|
@ -329,33 +342,40 @@ pub mod cased {
|
|||
use super::ShortOffsetRunHeader;
|
||||
|
||||
static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 22] = [
|
||||
ShortOffsetRunHeader::new(0, 4256), ShortOffsetRunHeader::new(55, 5024),
|
||||
ShortOffsetRunHeader::new(65, 7296), ShortOffsetRunHeader::new(69, 7958),
|
||||
ShortOffsetRunHeader::new(78, 9398), ShortOffsetRunHeader::new(153, 11264),
|
||||
ShortOffsetRunHeader::new(155, 42560), ShortOffsetRunHeader::new(167, 43824),
|
||||
ShortOffsetRunHeader::new(187, 64256), ShortOffsetRunHeader::new(193, 65313),
|
||||
ShortOffsetRunHeader::new(197, 66560), ShortOffsetRunHeader::new(201, 67456),
|
||||
ShortOffsetRunHeader::new(223, 68736), ShortOffsetRunHeader::new(231, 71840),
|
||||
ShortOffsetRunHeader::new(239, 93760), ShortOffsetRunHeader::new(241, 119808),
|
||||
ShortOffsetRunHeader::new(243, 120486), ShortOffsetRunHeader::new(280, 122624),
|
||||
ShortOffsetRunHeader::new(303, 122928), ShortOffsetRunHeader::new(309, 125184),
|
||||
ShortOffsetRunHeader::new(311, 127280), ShortOffsetRunHeader::new(313, 1241482),
|
||||
ShortOffsetRunHeader::new(0, 4256), ShortOffsetRunHeader::new(51, 5024),
|
||||
ShortOffsetRunHeader::new(61, 7296), ShortOffsetRunHeader::new(65, 7958),
|
||||
ShortOffsetRunHeader::new(74, 9398), ShortOffsetRunHeader::new(149, 11264),
|
||||
ShortOffsetRunHeader::new(151, 42560), ShortOffsetRunHeader::new(163, 43824),
|
||||
ShortOffsetRunHeader::new(183, 64256), ShortOffsetRunHeader::new(189, 65313),
|
||||
ShortOffsetRunHeader::new(193, 66560), ShortOffsetRunHeader::new(197, 67456),
|
||||
ShortOffsetRunHeader::new(219, 68736), ShortOffsetRunHeader::new(227, 71840),
|
||||
ShortOffsetRunHeader::new(235, 93760), ShortOffsetRunHeader::new(237, 119808),
|
||||
ShortOffsetRunHeader::new(239, 120486), ShortOffsetRunHeader::new(276, 122624),
|
||||
ShortOffsetRunHeader::new(299, 122928), ShortOffsetRunHeader::new(305, 125184),
|
||||
ShortOffsetRunHeader::new(307, 127280), ShortOffsetRunHeader::new(309, 1241482),
|
||||
];
|
||||
static OFFSETS: [u8; 319] = [
|
||||
65, 26, 6, 26, 47, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 1, 36, 7, 2, 30, 5,
|
||||
96, 1, 42, 4, 2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9,
|
||||
41, 0, 38, 1, 1, 5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 11, 5, 43, 2, 3, 64, 192, 64, 0, 2, 6,
|
||||
2, 38, 2, 6, 2, 8, 1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4,
|
||||
13, 5, 3, 1, 7, 116, 1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1,
|
||||
4, 1, 6, 4, 1, 2, 4, 5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5,
|
||||
1, 0, 46, 18, 30, 132, 102, 3, 4, 1, 62, 2, 2, 1, 1, 1, 8, 21, 5, 1, 3, 0, 43, 1, 14, 6, 80,
|
||||
0, 7, 12, 5, 0, 26, 6, 26, 0, 80, 96, 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15,
|
||||
1, 7, 1, 2, 0, 1, 2, 3, 1, 42, 1, 9, 0, 51, 13, 51, 93, 22, 10, 22, 0, 64, 0, 64, 0, 85, 1,
|
||||
71, 1, 2, 2, 1, 2, 2, 2, 4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5,
|
||||
1, 1, 3, 7, 1, 0, 2, 25, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1,
|
||||
8, 0, 10, 1, 20, 6, 6, 0, 62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
|
||||
static OFFSETS: [u8; 315] = [
|
||||
170, 1, 10, 1, 4, 1, 5, 23, 1, 31, 1, 195, 1, 4, 4, 208, 1, 36, 7, 2, 30, 5, 96, 1, 42, 4,
|
||||
2, 2, 2, 4, 1, 1, 6, 1, 1, 3, 1, 1, 1, 20, 1, 83, 1, 139, 8, 166, 1, 38, 9, 41, 0, 38, 1, 1,
|
||||
5, 1, 2, 43, 1, 4, 0, 86, 2, 6, 0, 11, 5, 43, 2, 3, 64, 192, 64, 0, 2, 6, 2, 38, 2, 6, 2, 8,
|
||||
1, 1, 1, 1, 1, 1, 1, 31, 2, 53, 1, 7, 1, 1, 3, 3, 1, 7, 3, 4, 2, 6, 4, 13, 5, 3, 1, 7, 116,
|
||||
1, 13, 1, 16, 13, 101, 1, 4, 1, 2, 10, 1, 1, 3, 5, 6, 1, 1, 1, 1, 1, 1, 4, 1, 6, 4, 1, 2, 4,
|
||||
5, 5, 4, 1, 17, 32, 3, 2, 0, 52, 0, 229, 6, 4, 3, 2, 12, 38, 1, 1, 5, 1, 0, 46, 18, 30, 132,
|
||||
102, 3, 4, 1, 62, 2, 2, 1, 1, 1, 8, 21, 5, 1, 3, 0, 43, 1, 14, 6, 80, 0, 7, 12, 5, 0, 26, 6,
|
||||
26, 0, 80, 96, 36, 4, 36, 116, 11, 1, 15, 1, 7, 1, 2, 1, 11, 1, 15, 1, 7, 1, 2, 0, 1, 2, 3,
|
||||
1, 42, 1, 9, 0, 51, 13, 51, 93, 22, 10, 22, 0, 64, 0, 64, 0, 85, 1, 71, 1, 2, 2, 1, 2, 2, 2,
|
||||
4, 1, 12, 1, 1, 1, 7, 1, 65, 1, 4, 2, 8, 1, 7, 1, 28, 1, 4, 1, 5, 1, 1, 3, 7, 1, 0, 2, 25,
|
||||
1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 31, 1, 25, 1, 8, 0, 10, 1, 20, 6, 6, 0,
|
||||
62, 0, 68, 0, 26, 6, 26, 6, 26, 0,
|
||||
];
|
||||
#[inline]
|
||||
pub fn lookup(c: char) -> bool {
|
||||
debug_assert!(!c.is_ascii());
|
||||
(c as u32) >= 0xaa && lookup_slow(c)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn lookup_slow(c: char) -> bool {
|
||||
const {
|
||||
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
|
||||
let mut i = 0;
|
||||
|
|
@ -424,6 +444,7 @@ pub mod grapheme_extend {
|
|||
];
|
||||
#[inline]
|
||||
pub fn lookup(c: char) -> bool {
|
||||
debug_assert!(!c.is_ascii());
|
||||
(c as u32) >= 0x300 && lookup_slow(c)
|
||||
}
|
||||
|
||||
|
|
@ -446,7 +467,7 @@ pub mod grapheme_extend {
|
|||
#[rustfmt::skip]
|
||||
pub mod lowercase {
|
||||
static BITSET_CHUNKS_MAP: [u8; 123] = [
|
||||
14, 17, 0, 0, 9, 0, 0, 12, 13, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
12, 17, 0, 0, 9, 0, 0, 13, 14, 10, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 4, 1, 0, 15, 0, 8, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0,
|
||||
|
|
@ -458,37 +479,37 @@ pub mod lowercase {
|
|||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 14, 56, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0, 0, 0, 0, 0, 67, 43, 0, 52, 48, 50, 33],
|
||||
[0, 0, 0, 0, 10, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 9, 57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 3, 0, 16, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27],
|
||||
[0, 0, 0, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 71, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 0, 34, 17, 23, 53, 54, 49, 47, 7, 35, 42, 0, 28, 12, 31],
|
||||
[0, 0, 46, 0, 56, 56, 56, 0, 22, 22, 69, 22, 36, 25, 24, 37],
|
||||
[0, 5, 70, 0, 29, 15, 75, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[0, 66, 34, 17, 23, 53, 54, 49, 47, 8, 35, 42, 0, 28, 13, 31],
|
||||
[11, 60, 0, 6, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 32, 0],
|
||||
[10, 60, 0, 6, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 32, 0],
|
||||
[16, 26, 22, 38, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[16, 51, 2, 21, 68, 9, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[16, 51, 2, 21, 68, 8, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[16, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
[65, 41, 55, 12, 77, 63, 18, 1, 7, 64, 76, 20, 73, 74, 4, 45],
|
||||
[65, 41, 55, 11, 66, 63, 18, 13, 1, 64, 76, 20, 73, 74, 4, 45],
|
||||
];
|
||||
static BITSET_CANONICAL: [u64; 56] = [
|
||||
0b0000000000000000000000000000000000000000000000000000000000000000,
|
||||
0b1111111111111111110000000000000000000000000011111111111111111111,
|
||||
0b0000111111111111111111111111110000000000000000000000000011111111,
|
||||
0b1010101010101010101010101010101010101010101010101010100000000010,
|
||||
0b0000000000000111111111111111111111111111111111111111111111111111,
|
||||
0b1111111111111111111111000000000000000000000000001111110111111111,
|
||||
0b1000000000000010000000000000000000000000000000000000000000000000,
|
||||
0b0000111111111111111111111111111111111111000000000000000000000000,
|
||||
0b0000111111111111111111111111110000000000000000000000000011111111,
|
||||
0b1111111111111111111111111111111111111111111111111010101010000101,
|
||||
0b1111111111111111111111111111111100000000000000000000000000000000,
|
||||
0b1111111111111111111111111111110000000000000000000000000000000000,
|
||||
0b1111111111111111111111110000000000000000000000000000000000000000,
|
||||
0b1111111111111111111111000000000000000000000000001111111111101111,
|
||||
0b1111111111111111111100000000000000000000000000010000000000000000,
|
||||
0b1111111111111111110000000000000000000000000011111111111111111111,
|
||||
0b1111111111111111000000111111111111110111111111111111111111111111,
|
||||
0b1111111111111111000000000000000000000000000000000100001111000000,
|
||||
0b1111111111111111000000000000000000000000000000000000000000000000,
|
||||
|
|
@ -532,13 +553,15 @@ pub mod lowercase {
|
|||
0b1110011001010001001011010010101001001110001001000011000100101001,
|
||||
0b1110101111000000000000000000000000001111111111111111111111111100,
|
||||
];
|
||||
static BITSET_MAPPING: [(u8, u8); 22] = [
|
||||
(0, 64), (1, 188), (1, 186), (1, 183), (1, 176), (1, 109), (1, 124), (1, 126), (1, 66),
|
||||
(1, 70), (1, 77), (2, 146), (2, 144), (2, 83), (3, 93), (3, 147), (3, 133), (4, 12), (4, 6),
|
||||
(5, 187), (6, 78), (7, 132),
|
||||
static BITSET_MAPPING: [(u8, u8); 21] = [
|
||||
(0, 64), (1, 184), (1, 182), (1, 179), (1, 172), (1, 161), (1, 146), (1, 144), (1, 140),
|
||||
(1, 136), (1, 132), (2, 146), (2, 144), (2, 83), (3, 93), (3, 147), (3, 133), (4, 12),
|
||||
(4, 6), (5, 187), (6, 78),
|
||||
];
|
||||
|
||||
pub const fn lookup(c: char) -> bool {
|
||||
debug_assert!(!c.is_ascii());
|
||||
(c as u32) >= 0xaa &&
|
||||
super::bitset_search(
|
||||
c as u32,
|
||||
&BITSET_CHUNKS_MAP,
|
||||
|
|
@ -554,43 +577,50 @@ pub mod n {
|
|||
use super::ShortOffsetRunHeader;
|
||||
|
||||
static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 42] = [
|
||||
ShortOffsetRunHeader::new(0, 1632), ShortOffsetRunHeader::new(9, 2406),
|
||||
ShortOffsetRunHeader::new(15, 4160), ShortOffsetRunHeader::new(49, 4969),
|
||||
ShortOffsetRunHeader::new(53, 5870), ShortOffsetRunHeader::new(55, 6470),
|
||||
ShortOffsetRunHeader::new(63, 8304), ShortOffsetRunHeader::new(79, 9312),
|
||||
ShortOffsetRunHeader::new(89, 10102), ShortOffsetRunHeader::new(93, 11517),
|
||||
ShortOffsetRunHeader::new(95, 12295), ShortOffsetRunHeader::new(97, 12690),
|
||||
ShortOffsetRunHeader::new(103, 42528), ShortOffsetRunHeader::new(115, 43056),
|
||||
ShortOffsetRunHeader::new(119, 44016), ShortOffsetRunHeader::new(131, 65296),
|
||||
ShortOffsetRunHeader::new(133, 65799), ShortOffsetRunHeader::new(135, 66273),
|
||||
ShortOffsetRunHeader::new(141, 67672), ShortOffsetRunHeader::new(153, 68858),
|
||||
ShortOffsetRunHeader::new(183, 69216), ShortOffsetRunHeader::new(189, 70736),
|
||||
ShortOffsetRunHeader::new(209, 71248), ShortOffsetRunHeader::new(213, 71904),
|
||||
ShortOffsetRunHeader::new(221, 72688), ShortOffsetRunHeader::new(225, 73552),
|
||||
ShortOffsetRunHeader::new(233, 74752), ShortOffsetRunHeader::new(237, 90416),
|
||||
ShortOffsetRunHeader::new(239, 92768), ShortOffsetRunHeader::new(241, 93552),
|
||||
ShortOffsetRunHeader::new(249, 93824), ShortOffsetRunHeader::new(251, 118000),
|
||||
ShortOffsetRunHeader::new(253, 119488), ShortOffsetRunHeader::new(255, 120782),
|
||||
ShortOffsetRunHeader::new(261, 123200), ShortOffsetRunHeader::new(263, 123632),
|
||||
ShortOffsetRunHeader::new(265, 124144), ShortOffsetRunHeader::new(267, 125127),
|
||||
ShortOffsetRunHeader::new(271, 126065), ShortOffsetRunHeader::new(275, 127232),
|
||||
ShortOffsetRunHeader::new(285, 130032), ShortOffsetRunHeader::new(287, 1244154),
|
||||
ShortOffsetRunHeader::new(0, 1632), ShortOffsetRunHeader::new(7, 2406),
|
||||
ShortOffsetRunHeader::new(13, 4160), ShortOffsetRunHeader::new(47, 4969),
|
||||
ShortOffsetRunHeader::new(51, 5870), ShortOffsetRunHeader::new(53, 6470),
|
||||
ShortOffsetRunHeader::new(61, 8304), ShortOffsetRunHeader::new(77, 9312),
|
||||
ShortOffsetRunHeader::new(87, 10102), ShortOffsetRunHeader::new(91, 11517),
|
||||
ShortOffsetRunHeader::new(93, 12295), ShortOffsetRunHeader::new(95, 12690),
|
||||
ShortOffsetRunHeader::new(101, 42528), ShortOffsetRunHeader::new(113, 43056),
|
||||
ShortOffsetRunHeader::new(117, 44016), ShortOffsetRunHeader::new(129, 65296),
|
||||
ShortOffsetRunHeader::new(131, 65799), ShortOffsetRunHeader::new(133, 66273),
|
||||
ShortOffsetRunHeader::new(139, 67672), ShortOffsetRunHeader::new(151, 68858),
|
||||
ShortOffsetRunHeader::new(181, 69216), ShortOffsetRunHeader::new(187, 70736),
|
||||
ShortOffsetRunHeader::new(207, 71248), ShortOffsetRunHeader::new(211, 71904),
|
||||
ShortOffsetRunHeader::new(219, 72688), ShortOffsetRunHeader::new(223, 73552),
|
||||
ShortOffsetRunHeader::new(231, 74752), ShortOffsetRunHeader::new(235, 90416),
|
||||
ShortOffsetRunHeader::new(237, 92768), ShortOffsetRunHeader::new(239, 93552),
|
||||
ShortOffsetRunHeader::new(247, 93824), ShortOffsetRunHeader::new(249, 118000),
|
||||
ShortOffsetRunHeader::new(251, 119488), ShortOffsetRunHeader::new(253, 120782),
|
||||
ShortOffsetRunHeader::new(259, 123200), ShortOffsetRunHeader::new(261, 123632),
|
||||
ShortOffsetRunHeader::new(263, 124144), ShortOffsetRunHeader::new(265, 125127),
|
||||
ShortOffsetRunHeader::new(269, 126065), ShortOffsetRunHeader::new(273, 127232),
|
||||
ShortOffsetRunHeader::new(283, 130032), ShortOffsetRunHeader::new(285, 1244154),
|
||||
];
|
||||
static OFFSETS: [u8; 289] = [
|
||||
48, 10, 120, 2, 5, 1, 2, 3, 0, 10, 134, 10, 198, 10, 0, 10, 118, 10, 4, 6, 108, 10, 118,
|
||||
10, 118, 10, 2, 6, 110, 13, 115, 10, 8, 7, 103, 10, 104, 7, 7, 19, 109, 10, 96, 10, 118, 10,
|
||||
70, 20, 0, 10, 70, 10, 0, 20, 0, 3, 239, 10, 6, 10, 22, 10, 0, 10, 128, 11, 165, 10, 6, 10,
|
||||
182, 10, 86, 10, 134, 10, 6, 10, 0, 1, 3, 6, 6, 10, 198, 51, 2, 5, 0, 60, 78, 22, 0, 30, 0,
|
||||
1, 0, 1, 25, 9, 14, 3, 0, 4, 138, 10, 30, 8, 1, 15, 32, 10, 39, 15, 0, 10, 188, 10, 0, 6,
|
||||
154, 10, 38, 10, 198, 10, 22, 10, 86, 10, 0, 10, 0, 10, 0, 45, 12, 57, 17, 2, 0, 27, 36, 4,
|
||||
29, 1, 8, 1, 134, 5, 202, 10, 0, 8, 25, 7, 39, 9, 75, 5, 22, 6, 160, 2, 2, 16, 2, 46, 64, 9,
|
||||
52, 2, 30, 3, 75, 5, 104, 8, 24, 8, 41, 7, 0, 6, 48, 10, 6, 10, 0, 31, 158, 10, 42, 4, 112,
|
||||
7, 134, 30, 128, 10, 60, 10, 144, 10, 7, 20, 251, 10, 0, 10, 118, 10, 0, 10, 102, 10, 6, 20,
|
||||
76, 12, 0, 19, 93, 10, 0, 10, 86, 29, 227, 10, 70, 10, 0, 10, 102, 21, 0, 111, 0, 10, 0, 10,
|
||||
86, 10, 134, 10, 1, 7, 0, 10, 0, 23, 0, 10, 0, 20, 12, 20, 108, 25, 0, 50, 0, 10, 0, 10, 0,
|
||||
10, 247, 10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, 0, 10, 0,
|
||||
static OFFSETS: [u8; 287] = [
|
||||
178, 2, 5, 1, 2, 3, 0, 10, 134, 10, 198, 10, 0, 10, 118, 10, 4, 6, 108, 10, 118, 10, 118,
|
||||
10, 2, 6, 110, 13, 115, 10, 8, 7, 103, 10, 104, 7, 7, 19, 109, 10, 96, 10, 118, 10, 70, 20,
|
||||
0, 10, 70, 10, 0, 20, 0, 3, 239, 10, 6, 10, 22, 10, 0, 10, 128, 11, 165, 10, 6, 10, 182, 10,
|
||||
86, 10, 134, 10, 6, 10, 0, 1, 3, 6, 6, 10, 198, 51, 2, 5, 0, 60, 78, 22, 0, 30, 0, 1, 0, 1,
|
||||
25, 9, 14, 3, 0, 4, 138, 10, 30, 8, 1, 15, 32, 10, 39, 15, 0, 10, 188, 10, 0, 6, 154, 10,
|
||||
38, 10, 198, 10, 22, 10, 86, 10, 0, 10, 0, 10, 0, 45, 12, 57, 17, 2, 0, 27, 36, 4, 29, 1, 8,
|
||||
1, 134, 5, 202, 10, 0, 8, 25, 7, 39, 9, 75, 5, 22, 6, 160, 2, 2, 16, 2, 46, 64, 9, 52, 2,
|
||||
30, 3, 75, 5, 104, 8, 24, 8, 41, 7, 0, 6, 48, 10, 6, 10, 0, 31, 158, 10, 42, 4, 112, 7, 134,
|
||||
30, 128, 10, 60, 10, 144, 10, 7, 20, 251, 10, 0, 10, 118, 10, 0, 10, 102, 10, 6, 20, 76, 12,
|
||||
0, 19, 93, 10, 0, 10, 86, 29, 227, 10, 70, 10, 0, 10, 102, 21, 0, 111, 0, 10, 0, 10, 86, 10,
|
||||
134, 10, 1, 7, 0, 10, 0, 23, 0, 10, 0, 20, 12, 20, 108, 25, 0, 50, 0, 10, 0, 10, 0, 10, 247,
|
||||
10, 0, 9, 128, 10, 0, 59, 1, 3, 1, 4, 76, 45, 1, 15, 0, 13, 0, 10, 0,
|
||||
];
|
||||
#[inline]
|
||||
pub fn lookup(c: char) -> bool {
|
||||
debug_assert!(!c.is_ascii());
|
||||
(c as u32) >= 0xb2 && lookup_slow(c)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn lookup_slow(c: char) -> bool {
|
||||
const {
|
||||
assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32);
|
||||
let mut i = 0;
|
||||
|
|
@ -608,34 +638,34 @@ pub mod n {
|
|||
#[rustfmt::skip]
|
||||
pub mod uppercase {
|
||||
static BITSET_CHUNKS_MAP: [u8; 125] = [
|
||||
12, 15, 6, 6, 0, 6, 6, 2, 4, 11, 6, 16, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 5, 6, 14, 6, 10, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 13, 6, 6,
|
||||
6, 6, 9, 6, 3,
|
||||
3, 14, 6, 6, 0, 6, 6, 2, 5, 12, 6, 15, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 9, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 6, 6, 7, 6, 13, 6, 11, 6, 6, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
||||
6, 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 16, 6, 6,
|
||||
6, 6, 10, 6, 4,
|
||||
];
|
||||
static BITSET_INDEX_CHUNKS: [[u8; 16]; 17] = [
|
||||
[44, 44, 5, 35, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 5, 1],
|
||||
[44, 44, 5, 35, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 5, 0],
|
||||
[44, 44, 5, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 40, 44, 44, 44, 44, 44, 17, 17, 63, 17, 43, 29, 24, 23],
|
||||
[44, 44, 40, 44, 44, 44, 44, 44, 17, 17, 62, 17, 43, 29, 24, 23],
|
||||
[44, 44, 44, 32, 36, 21, 22, 15, 13, 34, 44, 44, 44, 11, 30, 39],
|
||||
[44, 44, 44, 44, 9, 8, 45, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 44, 44, 37, 28, 67, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 0, 44, 44, 44],
|
||||
[44, 44, 44, 44, 37, 28, 66, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 44, 44, 44, 44, 44, 44, 44, 55, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 44, 44, 44, 44, 44, 44, 44, 62, 61, 44, 20, 14, 16, 4],
|
||||
[44, 44, 44, 44, 56, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 59, 44, 44, 31, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 60, 46, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[44, 49, 44, 32, 36, 21, 22, 15, 13, 34, 44, 44, 44, 11, 30, 39],
|
||||
[52, 54, 26, 50, 12, 7, 25, 51, 41, 53, 6, 3, 66, 65, 64, 68],
|
||||
[57, 44, 9, 47, 44, 42, 33, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[58, 19, 2, 18, 10, 48, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[58, 38, 17, 27, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 57, 44, 44, 44],
|
||||
[44, 44, 44, 44, 44, 44, 44, 44, 44, 49, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 44, 44, 44, 44, 44, 44, 44, 61, 60, 44, 20, 14, 16, 4],
|
||||
[44, 44, 44, 44, 50, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 53, 44, 44, 31, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[44, 44, 54, 46, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[51, 44, 9, 47, 44, 42, 33, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[52, 19, 2, 18, 10, 48, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[52, 38, 17, 27, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44],
|
||||
[58, 1, 26, 55, 12, 7, 25, 56, 41, 59, 6, 3, 65, 64, 63, 67],
|
||||
];
|
||||
static BITSET_CANONICAL: [u64; 44] = [
|
||||
0b0000011111111111111111111111111000000000000000000000000000000000,
|
||||
0b0000000000111111111111111111111111111111111111111111111111111111,
|
||||
0b1111111111111111111111110000000000000000000000000011111111111111,
|
||||
0b0101010101010101010101010101010101010101010101010101010000000001,
|
||||
0b0000011111111111111111111111110000000000000000000000000000000001,
|
||||
0b0000000000100000000000000000000000010101010000010001101011110101,
|
||||
|
|
@ -679,13 +709,15 @@ pub mod uppercase {
|
|||
0b1111011111111111000000000000000000000000000000000000000000000000,
|
||||
0b1111111100000000111111110000000000111111000000001111111100000000,
|
||||
];
|
||||
static BITSET_MAPPING: [(u8, u8); 25] = [
|
||||
(0, 187), (0, 177), (0, 171), (0, 167), (0, 164), (0, 32), (0, 47), (0, 51), (0, 121),
|
||||
(0, 117), (0, 109), (1, 150), (1, 148), (1, 142), (1, 134), (1, 131), (1, 64), (2, 164),
|
||||
(2, 146), (2, 20), (3, 146), (3, 140), (3, 134), (4, 178), (4, 171),
|
||||
static BITSET_MAPPING: [(u8, u8); 24] = [
|
||||
(0, 182), (0, 74), (0, 166), (0, 162), (0, 159), (0, 150), (0, 148), (0, 142), (0, 134),
|
||||
(0, 131), (0, 64), (1, 66), (1, 70), (1, 83), (1, 12), (1, 8), (2, 164), (2, 146), (2, 20),
|
||||
(3, 146), (3, 140), (3, 134), (4, 178), (4, 171),
|
||||
];
|
||||
|
||||
pub const fn lookup(c: char) -> bool {
|
||||
debug_assert!(!c.is_ascii());
|
||||
(c as u32) >= 0xc0 &&
|
||||
super::bitset_search(
|
||||
c as u32,
|
||||
&BITSET_CHUNKS_MAP,
|
||||
|
|
@ -699,8 +731,8 @@ pub mod uppercase {
|
|||
#[rustfmt::skip]
|
||||
pub mod white_space {
|
||||
static WHITESPACE_MAP: [u8; 256] = [
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
|
@ -711,6 +743,7 @@ pub mod white_space {
|
|||
];
|
||||
#[inline]
|
||||
pub const fn lookup(c: char) -> bool {
|
||||
debug_assert!(!c.is_ascii());
|
||||
match c as u32 >> 8 {
|
||||
0 => WHITESPACE_MAP[c as usize & 0xff] & 1 != 0,
|
||||
22 => c as u32 == 0x1680,
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ impl RawEmitter {
|
|||
|
||||
writeln!(&mut self.file, "#[inline]").unwrap();
|
||||
writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
|
||||
writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap();
|
||||
writeln!(&mut self.file, " match c as u32 >> 8 {{").unwrap();
|
||||
for arm in arms {
|
||||
writeln!(&mut self.file, " {arm},").unwrap();
|
||||
|
|
|
|||
|
|
@ -195,6 +195,7 @@ fn load_data() -> UnicodeData {
|
|||
.into_iter()
|
||||
.flatten()
|
||||
.flat_map(|cp| cp.scalar())
|
||||
.filter(|c| !c.is_ascii())
|
||||
.map(u32::from)
|
||||
.collect::<Vec<_>>();
|
||||
(prop, ranges_from_set(&codepoints))
|
||||
|
|
|
|||
|
|
@ -98,6 +98,7 @@ impl RawEmitter {
|
|||
self.blank_line();
|
||||
|
||||
writeln!(&mut self.file, "pub const fn lookup(c: char) -> bool {{").unwrap();
|
||||
writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap();
|
||||
if first_code_point > 0x7f {
|
||||
writeln!(&mut self.file, " (c as u32) >= {first_code_point:#04x} &&").unwrap();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,6 +99,7 @@ impl RawEmitter {
|
|||
if first_code_point > 0x7f {
|
||||
writeln!(&mut self.file, "#[inline]").unwrap();
|
||||
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
|
||||
writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap();
|
||||
writeln!(&mut self.file, " (c as u32) >= {first_code_point:#04x} && lookup_slow(c)")
|
||||
.unwrap();
|
||||
writeln!(&mut self.file, "}}").unwrap();
|
||||
|
|
@ -107,6 +108,7 @@ impl RawEmitter {
|
|||
writeln!(&mut self.file, "fn lookup_slow(c: char) -> bool {{").unwrap();
|
||||
} else {
|
||||
writeln!(&mut self.file, "pub fn lookup(c: char) -> bool {{").unwrap();
|
||||
writeln!(&mut self.file, " debug_assert!(!c.is_ascii());").unwrap();
|
||||
}
|
||||
writeln!(&mut self.file, " const {{").unwrap();
|
||||
writeln!(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue