Deduplicate test and primary range_search definitions

This ensures that what we test is what we get for final results as well.
2020-03-21 12:20:18 -04:00 · 2020-03-21 12:20:18 -04:00 · 5f71d98f90
commit 5f71d98f90
parent 7b29b70d6e
4 changed files with 103 additions and 101 deletions
--- a/src/libcore/unicode/mod.rs
+++ b/src/libcore/unicode/mod.rs
@ -32,48 +32,3 @@ pub use unicode_data::lowercase::lookup as Lowercase;
 pub use unicode_data::n::lookup as N;
 pub use unicode_data::uppercase::lookup as Uppercase;
 pub use unicode_data::white_space::lookup as White_Space;
-
-#[inline(always)]
-fn range_search<
-    const N: usize,
-    const CHUNK_SIZE: usize,
-    const N1: usize,
-    const CANONICAL: usize,
-    const CANONICALIZED: usize,
->(
-    needle: u32,
-    chunk_idx_map: &[u8; N],
-    (last_chunk_idx, last_chunk_mapping): (u16, u8),
-    bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
-    bitset_canonical: &[u64; CANONICAL],
-    bitset_canonicalized: &[(u8, u8); CANONICALIZED],
-) -> bool {
-    let bucket_idx = (needle / 64) as usize;
-    let chunk_map_idx = bucket_idx / CHUNK_SIZE;
-    let chunk_piece = bucket_idx % CHUNK_SIZE;
-    let chunk_idx = if chunk_map_idx >= N {
-        if chunk_map_idx == last_chunk_idx as usize {
-            last_chunk_mapping
-        } else {
-            return false;
-        }
-    } else {
-        chunk_idx_map[chunk_map_idx]
-    };
-    let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
-    let word = if idx < CANONICAL {
-        bitset_canonical[idx]
-    } else {
-        let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
-        let mut word = bitset_canonical[real_idx as usize];
-        let should_invert = mapping & (1 << 6) != 0;
-        if should_invert {
-            word = !word;
-        }
-        // Unset the inversion bit
-        let rotate_by = mapping & !(1 << 6);
-        word = word.rotate_left(rotate_by as u32);
-        word
-    };
-    (word & (1 << (needle % 64) as u64)) != 0
-}
--- a/src/libcore/unicode/unicode_data.rs
+++ b/src/libcore/unicode/unicode_data.rs
@ -1,5 +1,54 @@
 ///! This file is generated by src/tools/unicode-table-generator; do not edit manually!
-use super::range_search;
+
+#[inline(always)]
+fn range_search<
+    const N: usize,
+    const CHUNK_SIZE: usize,
+    const N1: usize,
+    const CANONICAL: usize,
+    const CANONICALIZED: usize,
+>(
+    needle: u32,
+    chunk_idx_map: &[u8; N],
+    (last_chunk_idx, last_chunk_mapping): (u16, u8),
+    bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
+    bitset_canonical: &[u64; CANONICAL],
+    bitset_canonicalized: &[(u8, u8); CANONICALIZED],
+) -> bool {
+    let bucket_idx = (needle / 64) as usize;
+    let chunk_map_idx = bucket_idx / CHUNK_SIZE;
+    let chunk_piece = bucket_idx % CHUNK_SIZE;
+    let chunk_idx = if chunk_map_idx >= N {
+        if chunk_map_idx == last_chunk_idx as usize {
+            last_chunk_mapping
+        } else {
+            return false;
+        }
+    } else {
+        chunk_idx_map[chunk_map_idx]
+    };
+    let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
+    let word = if idx < CANONICAL {
+        bitset_canonical[idx]
+    } else {
+        let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
+        let mut word = bitset_canonical[real_idx as usize];
+        let should_invert = mapping & (1 << 6) != 0;
+        if should_invert {
+            word = !word;
+        }
+        // Lower 6 bits
+        let quantity = mapping & ((1 << 6) - 1);
+        if mapping & (1 << 7) != 0 {
+            // shift
+            word >>= quantity as u64;
+        } else {
+            word = word.rotate_left(quantity as u32);
+        }
+        word
+    };
+    (word & (1 << (needle % 64) as u64)) != 0
+}

 pub const UNICODE_VERSION: (u32, u32, u32) = (13, 0, 0);

--- a/src/tools/unicode-table-generator/src/main.rs
+++ b/src/tools/unicode-table-generator/src/main.rs
@ -181,7 +181,10 @@ fn main() {
        "///! This file is generated by src/tools/unicode-table-generator; do not edit manually!\n",
    );

-    table_file.push_str("use super::range_search;\n\n");
+    // Include the range search function
+    table_file.push('\n');
+    table_file.push_str(include_str!("range_search.rs"));
+    table_file.push('\n');

    table_file.push_str(&version());

@ -251,60 +254,6 @@ fn generate_tests(data_path: &str, ranges: &[(&str, Vec<Range<u32>>)]) -> String
    s.push_str(&format!("#[path = \"{}\"]\n", data_path));
    s.push_str("mod unicode_data;\n\n");

-    s.push_str(
-        "
-#[inline(always)]
-fn range_search<
-    const N: usize,
-    const CHUNK_SIZE: usize,
-    const N1: usize,
-    const CANONICAL: usize,
-    const CANONICALIZED: usize,
->(
-    needle: u32,
-    chunk_idx_map: &[u8; N],
-    (last_chunk_idx, last_chunk_mapping): (u16, u8),
-    bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
-    bitset_canonical: &[u64; CANONICAL],
-    bitset_canonicalized: &[(u8, u8); CANONICALIZED],
-) -> bool {
-    let bucket_idx = (needle / 64) as usize;
-    let chunk_map_idx = bucket_idx / CHUNK_SIZE;
-    let chunk_piece = bucket_idx % CHUNK_SIZE;
-    let chunk_idx = if chunk_map_idx >= N {
-        if chunk_map_idx == last_chunk_idx as usize {
-            last_chunk_mapping
-        } else {
-            return false;
-        }
-    } else {
-        chunk_idx_map[chunk_map_idx]
-    };
-    let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
-    let word = if idx < CANONICAL {
-        bitset_canonical[idx]
-    } else {
-        let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
-        let mut word = bitset_canonical[real_idx as usize];
-        let should_invert = mapping & (1 << 6) != 0;
-        if should_invert {
-            word = !word;
-        }
-        // Lower 6 bits
-        let quantity = mapping & ((1 << 6) - 1);
-        if mapping & (1 << 7) != 0 {
-            // shift
-            word >>= quantity as u64;
-        } else {
-            word = word.rotate_left(quantity as u32);
-        }
-        word
-    };
-    (word & (1 << (needle % 64) as u64)) != 0
-}
-    ",
-    );
-
    s.push_str("\nfn main() {\n");

    for (property, ranges) in ranges {
--- a/src/tools/unicode-table-generator/src/range_search.rs
+++ b/src/tools/unicode-table-generator/src/range_search.rs
@ -0,0 +1,49 @@
+#[inline(always)]
+fn range_search<
+    const N: usize,
+    const CHUNK_SIZE: usize,
+    const N1: usize,
+    const CANONICAL: usize,
+    const CANONICALIZED: usize,
+>(
+    needle: u32,
+    chunk_idx_map: &[u8; N],
+    (last_chunk_idx, last_chunk_mapping): (u16, u8),
+    bitset_chunk_idx: &[[u8; CHUNK_SIZE]; N1],
+    bitset_canonical: &[u64; CANONICAL],
+    bitset_canonicalized: &[(u8, u8); CANONICALIZED],
+) -> bool {
+    let bucket_idx = (needle / 64) as usize;
+    let chunk_map_idx = bucket_idx / CHUNK_SIZE;
+    let chunk_piece = bucket_idx % CHUNK_SIZE;
+    let chunk_idx = if chunk_map_idx >= N {
+        if chunk_map_idx == last_chunk_idx as usize {
+            last_chunk_mapping
+        } else {
+            return false;
+        }
+    } else {
+        chunk_idx_map[chunk_map_idx]
+    };
+    let idx = bitset_chunk_idx[(chunk_idx as usize)][chunk_piece] as usize;
+    let word = if idx < CANONICAL {
+        bitset_canonical[idx]
+    } else {
+        let (real_idx, mapping) = bitset_canonicalized[idx - CANONICAL];
+        let mut word = bitset_canonical[real_idx as usize];
+        let should_invert = mapping & (1 << 6) != 0;
+        if should_invert {
+            word = !word;
+        }
+        // Lower 6 bits
+        let quantity = mapping & ((1 << 6) - 1);
+        if mapping & (1 << 7) != 0 {
+            // shift
+            word >>= quantity as u64;
+        } else {
+            word = word.rotate_left(quantity as u32);
+        }
+        word
+    };
+    (word & (1 << (needle % 64) as u64)) != 0
+}