diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 39532166a0b6..94a2507e26cc 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -32,48 +32,3 @@ pub use unicode_data::lowercase::lookup as Lowercase; pub use unicode_data::n::lookup as N; pub use unicode_data::uppercase::lookup as Uppercase; pub use unicode_data::white_space::lookup as White_Space; - -#[inline(always)] -fn range_search< - const N: usize, - const CHUNK_SIZE: usize, - const N1: usize, - const CANONICAL: usize, - const CANONICALIZED: usize, ->( - needle: u32, - chunk_idx_map: &[u8; N], - (last_chunk_idx, last_chunk_mapping): (u16, u8), - bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], - bitset_canonical: &[u64; CANONICAL], - bitset_canonicalized: &[(u8, u8); CANONICALIZED], -) -> bool { - let bucket_idx = (needle / 64) as usize; - let chunk_map_idx = bucket_idx / CHUNK_SIZE; - let chunk_piece = bucket_idx % CHUNK_SIZE; - let chunk_idx = if chunk_map_idx >= N { - if chunk_map_idx == last_chunk_idx as usize { - last_chunk_mapping - } else { - return false; - } - } else { - chunk_idx_map[chunk_map_idx] - }; - let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize; - let word = if idx < CANONICAL { - bitset_canonical[idx] - } else { - let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL]; - let mut word = bitset_canonical[real_idx as usize]; - let should_invert = mapping & (1 << 6) != 0; - if should_invert { - word = !word; - } - // Unset the inversion bit - let rotate_by = mapping & !(1 << 6); - word = word.rotate_left(rotate_by as u32); - word - }; - (word & (1 << (needle % 64) as u64)) != 0 -} diff --git a/src/libcore/unicode/unicode_data.rs b/src/libcore/unicode/unicode_data.rs index bae6d8ea9536..5b1efbaa28fe 100644 --- a/src/libcore/unicode/unicode_data.rs +++ b/src/libcore/unicode/unicode_data.rs @@ -1,5 +1,54 @@ ///! This file is generated by src/tools/unicode-table-generator; do not edit manually! -use super::range_search; + +#[inline(always)] +fn range_search< + const N: usize, + const CHUNK_SIZE: usize, + const N1: usize, + const CANONICAL: usize, + const CANONICALIZED: usize, +>( + needle: u32, + chunk_idx_map: &[u8; N], + (last_chunk_idx, last_chunk_mapping): (u16, u8), + bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], + bitset_canonical: &[u64; CANONICAL], + bitset_canonicalized: &[(u8, u8); CANONICALIZED], +) -> bool { + let bucket_idx = (needle / 64) as usize; + let chunk_map_idx = bucket_idx / CHUNK_SIZE; + let chunk_piece = bucket_idx % CHUNK_SIZE; + let chunk_idx = if chunk_map_idx >= N { + if chunk_map_idx == last_chunk_idx as usize { + last_chunk_mapping + } else { + return false; + } + } else { + chunk_idx_map[chunk_map_idx] + }; + let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize; + let word = if idx < CANONICAL { + bitset_canonical[idx] + } else { + let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL]; + let mut word = bitset_canonical[real_idx as usize]; + let should_invert = mapping & (1 << 6) != 0; + if should_invert { + word = !word; + } + // Lower 6 bits + let quantity = mapping & ((1 << 6) - 1); + if mapping & (1 << 7) != 0 { + // shift + word >>= quantity as u64; + } else { + word = word.rotate_left(quantity as u32); + } + word + }; + (word & (1 << (needle % 64) as u64)) != 0 +} pub const UNICODE_VERSION: (u32, u32, u32) = (13, 0, 0); diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs index 65ece05043a8..af23c166871e 100644 --- a/src/tools/unicode-table-generator/src/main.rs +++ b/src/tools/unicode-table-generator/src/main.rs @@ -181,7 +181,10 @@ fn main() { "///! This file is generated by src/tools/unicode-table-generator; do not edit manually!\n", ); - table_file.push_str("use super::range_search;\n\n"); + // Include the range search function + table_file.push('\n'); + table_file.push_str(include_str!("range_search.rs")); + table_file.push('\n'); table_file.push_str(&version()); @@ -251,60 +254,6 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec>)]) -> String s.push_str(&format!("#[path = \"{}\"]\n", data_path)); s.push_str("mod unicode_data;\n\n"); - s.push_str( - " -#[inline(always)] -fn range_search< - const N: usize, - const CHUNK_SIZE: usize, - const N1: usize, - const CANONICAL: usize, - const CANONICALIZED: usize, ->( - needle: u32, - chunk_idx_map: &[u8; N], - (last_chunk_idx, last_chunk_mapping): (u16, u8), - bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], - bitset_canonical: &[u64; CANONICAL], - bitset_canonicalized: &[(u8, u8); CANONICALIZED], -) -> bool { - let bucket_idx = (needle / 64) as usize; - let chunk_map_idx = bucket_idx / CHUNK_SIZE; - let chunk_piece = bucket_idx % CHUNK_SIZE; - let chunk_idx = if chunk_map_idx >= N { - if chunk_map_idx == last_chunk_idx as usize { - last_chunk_mapping - } else { - return false; - } - } else { - chunk_idx_map[chunk_map_idx] - }; - let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize; - let word = if idx < CANONICAL { - bitset_canonical[idx] - } else { - let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL]; - let mut word = bitset_canonical[real_idx as usize]; - let should_invert = mapping & (1 << 6) != 0; - if should_invert { - word = !word; - } - // Lower 6 bits - let quantity = mapping & ((1 << 6) - 1); - if mapping & (1 << 7) != 0 { - // shift - word >>= quantity as u64; - } else { - word = word.rotate_left(quantity as u32); - } - word - }; - (word & (1 << (needle % 64) as u64)) != 0 -} - ", - ); - s.push_str("\nfn main() {\n"); for (property, ranges) in ranges { diff --git a/src/tools/unicode-table-generator/src/range_search.rs b/src/tools/unicode-table-generator/src/range_search.rs new file mode 100644 index 000000000000..a0bc1e6aec53 --- /dev/null +++ b/src/tools/unicode-table-generator/src/range_search.rs @@ -0,0 +1,49 @@ +#[inline(always)] +fn range_search< + const N: usize, + const CHUNK_SIZE: usize, + const N1: usize, + const CANONICAL: usize, + const CANONICALIZED: usize, +>( + needle: u32, + chunk_idx_map: &[u8; N], + (last_chunk_idx, last_chunk_mapping): (u16, u8), + bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1], + bitset_canonical: &[u64; CANONICAL], + bitset_canonicalized: &[(u8, u8); CANONICALIZED], +) -> bool { + let bucket_idx = (needle / 64) as usize; + let chunk_map_idx = bucket_idx / CHUNK_SIZE; + let chunk_piece = bucket_idx % CHUNK_SIZE; + let chunk_idx = if chunk_map_idx >= N { + if chunk_map_idx == last_chunk_idx as usize { + last_chunk_mapping + } else { + return false; + } + } else { + chunk_idx_map[chunk_map_idx] + }; + let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize; + let word = if idx < CANONICAL { + bitset_canonical[idx] + } else { + let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL]; + let mut word = bitset_canonical[real_idx as usize]; + let should_invert = mapping & (1 << 6) != 0; + if should_invert { + word = !word; + } + // Lower 6 bits + let quantity = mapping & ((1 << 6) - 1); + if mapping & (1 << 7) != 0 { + // shift + word >>= quantity as u64; + } else { + word = word.rotate_left(quantity as u32); + } + word + }; + (word & (1 << (needle % 64) as u64)) != 0 +}