Deduplicate test and primary range_search definitions
This ensures that what we test is what we get for final results as well.
This commit is contained in:
parent
7b29b70d6e
commit
5f71d98f90
4 changed files with 103 additions and 101 deletions
|
|
@ -32,48 +32,3 @@ pub use unicode_data::lowercase::lookup as Lowercase;
|
|||
pub use unicode_data::n::lookup as N;
|
||||
pub use unicode_data::uppercase::lookup as Uppercase;
|
||||
pub use unicode_data::white_space::lookup as White_Space;
|
||||
|
||||
#[inline(always)]
|
||||
fn range_search<
|
||||
const N: usize,
|
||||
const CHUNK_SIZE: usize,
|
||||
const N1: usize,
|
||||
const CANONICAL: usize,
|
||||
const CANONICALIZED: usize,
|
||||
>(
|
||||
needle: u32,
|
||||
chunk_idx_map: &[u8; N],
|
||||
(last_chunk_idx, last_chunk_mapping): (u16, u8),
|
||||
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
|
||||
bitset_canonical: &[u64; CANONICAL],
|
||||
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
|
||||
) -> bool {
|
||||
let bucket_idx = (needle / 64) as usize;
|
||||
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
|
||||
let chunk_piece = bucket_idx % CHUNK_SIZE;
|
||||
let chunk_idx = if chunk_map_idx >= N {
|
||||
if chunk_map_idx == last_chunk_idx as usize {
|
||||
last_chunk_mapping
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
chunk_idx_map[chunk_map_idx]
|
||||
};
|
||||
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
|
||||
let word = if idx < CANONICAL {
|
||||
bitset_canonical[idx]
|
||||
} else {
|
||||
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
|
||||
let mut word = bitset_canonical[real_idx as usize];
|
||||
let should_invert = mapping & (1 << 6) != 0;
|
||||
if should_invert {
|
||||
word = !word;
|
||||
}
|
||||
// Unset the inversion bit
|
||||
let rotate_by = mapping & !(1 << 6);
|
||||
word = word.rotate_left(rotate_by as u32);
|
||||
word
|
||||
};
|
||||
(word & (1 << (needle % 64) as u64)) != 0
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,54 @@
|
|||
///! This file is generated by src/tools/unicode-table-generator; do not edit manually!
|
||||
use super::range_search;
|
||||
|
||||
#[inline(always)]
|
||||
fn range_search<
|
||||
const N: usize,
|
||||
const CHUNK_SIZE: usize,
|
||||
const N1: usize,
|
||||
const CANONICAL: usize,
|
||||
const CANONICALIZED: usize,
|
||||
>(
|
||||
needle: u32,
|
||||
chunk_idx_map: &[u8; N],
|
||||
(last_chunk_idx, last_chunk_mapping): (u16, u8),
|
||||
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
|
||||
bitset_canonical: &[u64; CANONICAL],
|
||||
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
|
||||
) -> bool {
|
||||
let bucket_idx = (needle / 64) as usize;
|
||||
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
|
||||
let chunk_piece = bucket_idx % CHUNK_SIZE;
|
||||
let chunk_idx = if chunk_map_idx >= N {
|
||||
if chunk_map_idx == last_chunk_idx as usize {
|
||||
last_chunk_mapping
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
chunk_idx_map[chunk_map_idx]
|
||||
};
|
||||
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
|
||||
let word = if idx < CANONICAL {
|
||||
bitset_canonical[idx]
|
||||
} else {
|
||||
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
|
||||
let mut word = bitset_canonical[real_idx as usize];
|
||||
let should_invert = mapping & (1 << 6) != 0;
|
||||
if should_invert {
|
||||
word = !word;
|
||||
}
|
||||
// Lower 6 bits
|
||||
let quantity = mapping & ((1 << 6) - 1);
|
||||
if mapping & (1 << 7) != 0 {
|
||||
// shift
|
||||
word >>= quantity as u64;
|
||||
} else {
|
||||
word = word.rotate_left(quantity as u32);
|
||||
}
|
||||
word
|
||||
};
|
||||
(word & (1 << (needle % 64) as u64)) != 0
|
||||
}
|
||||
|
||||
pub const UNICODE_VERSION: (u32, u32, u32) = (13, 0, 0);
|
||||
|
||||
|
|
|
|||
|
|
@ -181,7 +181,10 @@ fn main() {
|
|||
"///! This file is generated by src/tools/unicode-table-generator; do not edit manually!\n",
|
||||
);
|
||||
|
||||
table_file.push_str("use super::range_search;\n\n");
|
||||
// Include the range search function
|
||||
table_file.push('\n');
|
||||
table_file.push_str(include_str!("range_search.rs"));
|
||||
table_file.push('\n');
|
||||
|
||||
table_file.push_str(&version());
|
||||
|
||||
|
|
@ -251,60 +254,6 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String
|
|||
s.push_str(&format!("#[path = \"{}\"]\n", data_path));
|
||||
s.push_str("mod unicode_data;\n\n");
|
||||
|
||||
s.push_str(
|
||||
"
|
||||
#[inline(always)]
|
||||
fn range_search<
|
||||
const N: usize,
|
||||
const CHUNK_SIZE: usize,
|
||||
const N1: usize,
|
||||
const CANONICAL: usize,
|
||||
const CANONICALIZED: usize,
|
||||
>(
|
||||
needle: u32,
|
||||
chunk_idx_map: &[u8; N],
|
||||
(last_chunk_idx, last_chunk_mapping): (u16, u8),
|
||||
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
|
||||
bitset_canonical: &[u64; CANONICAL],
|
||||
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
|
||||
) -> bool {
|
||||
let bucket_idx = (needle / 64) as usize;
|
||||
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
|
||||
let chunk_piece = bucket_idx % CHUNK_SIZE;
|
||||
let chunk_idx = if chunk_map_idx >= N {
|
||||
if chunk_map_idx == last_chunk_idx as usize {
|
||||
last_chunk_mapping
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
chunk_idx_map[chunk_map_idx]
|
||||
};
|
||||
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
|
||||
let word = if idx < CANONICAL {
|
||||
bitset_canonical[idx]
|
||||
} else {
|
||||
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
|
||||
let mut word = bitset_canonical[real_idx as usize];
|
||||
let should_invert = mapping & (1 << 6) != 0;
|
||||
if should_invert {
|
||||
word = !word;
|
||||
}
|
||||
// Lower 6 bits
|
||||
let quantity = mapping & ((1 << 6) - 1);
|
||||
if mapping & (1 << 7) != 0 {
|
||||
// shift
|
||||
word >>= quantity as u64;
|
||||
} else {
|
||||
word = word.rotate_left(quantity as u32);
|
||||
}
|
||||
word
|
||||
};
|
||||
(word & (1 << (needle % 64) as u64)) != 0
|
||||
}
|
||||
",
|
||||
);
|
||||
|
||||
s.push_str("\nfn main() {\n");
|
||||
|
||||
for (property, ranges) in ranges {
|
||||
|
|
|
|||
49
src/tools/unicode-table-generator/src/range_search.rs
Normal file
49
src/tools/unicode-table-generator/src/range_search.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
#[inline(always)]
|
||||
fn range_search<
|
||||
const N: usize,
|
||||
const CHUNK_SIZE: usize,
|
||||
const N1: usize,
|
||||
const CANONICAL: usize,
|
||||
const CANONICALIZED: usize,
|
||||
>(
|
||||
needle: u32,
|
||||
chunk_idx_map: &[u8; N],
|
||||
(last_chunk_idx, last_chunk_mapping): (u16, u8),
|
||||
bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
|
||||
bitset_canonical: &[u64; CANONICAL],
|
||||
bitset_canonicalized: &[(u8, u8); CANONICALIZED],
|
||||
) -> bool {
|
||||
let bucket_idx = (needle / 64) as usize;
|
||||
let chunk_map_idx = bucket_idx / CHUNK_SIZE;
|
||||
let chunk_piece = bucket_idx % CHUNK_SIZE;
|
||||
let chunk_idx = if chunk_map_idx >= N {
|
||||
if chunk_map_idx == last_chunk_idx as usize {
|
||||
last_chunk_mapping
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
chunk_idx_map[chunk_map_idx]
|
||||
};
|
||||
let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
|
||||
let word = if idx < CANONICAL {
|
||||
bitset_canonical[idx]
|
||||
} else {
|
||||
let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
|
||||
let mut word = bitset_canonical[real_idx as usize];
|
||||
let should_invert = mapping & (1 << 6) != 0;
|
||||
if should_invert {
|
||||
word = !word;
|
||||
}
|
||||
// Lower 6 bits
|
||||
let quantity = mapping & ((1 << 6) - 1);
|
||||
if mapping & (1 << 7) != 0 {
|
||||
// shift
|
||||
word >>= quantity as u64;
|
||||
} else {
|
||||
word = word.rotate_left(quantity as u32);
|
||||
}
|
||||
word
|
||||
};
|
||||
(word & (1 << (needle % 64) as u64)) != 0
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue