From 08c396b91e245a0c9a596dc3f7053e7cb2038299 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 20 Dec 2025 23:44:06 -0700 Subject: [PATCH] rustdoc: upgrade to stringdex 0.0.4 - code cleanup - smaller encoding for runs - fast path for the common encoding case --- Cargo.lock | 4 +- src/librustdoc/Cargo.toml | 2 +- src/librustdoc/html/static/js/stringdex.js | 139 +++++++++++++++++---- 3 files changed, 119 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1cd998a0bd2f..2c5053f0c2b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5357,9 +5357,9 @@ dependencies = [ [[package]] name = "stringdex" -version = "0.0.3" +version = "0.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "556a6126952cb2f5150057c98a77cc6c771027dea2825bf7fa03d3d638b0a4f8" +checksum = "c6204af9e1e433f1ef9b6d44475c7089be33c91111d896463b9dfa20464b87f1" dependencies = [ "stacker", ] diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index dcfc1ffc251e..ab75d2dfa429 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -22,7 +22,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" smallvec = "1.8.1" -stringdex = "=0.0.3" +stringdex = "=0.0.4" tempfile = "3" threadpool = "1.8.1" tracing = "0.1" diff --git a/src/librustdoc/html/static/js/stringdex.js b/src/librustdoc/html/static/js/stringdex.js index 2a73ae50e720..d04fd6c0e451 100644 --- a/src/librustdoc/html/static/js/stringdex.js +++ b/src/librustdoc/html/static/js/stringdex.js @@ -54,6 +54,52 @@ class RoaringBitmap { } this.consumed_len_bytes = pspecial - i; return this; + } else if (u8array[i] > 0xe0) { + // Special representation of tiny sets that are runs + const lspecial = u8array[i] & 0x0f; + this.keysAndCardinalities = new Uint8Array(lspecial * 4); + i += 1; + const key = u8array[i + 2] | (u8array[i + 3] << 8); + const value = u8array[i] | (u8array[i + 1] << 8); + const container = new RoaringBitmapRun(1, new Uint8Array(4)); + container.array[0] = value & 0xFF; + container.array[1] = (value >> 8) & 0xFF; + container.array[2] = lspecial - 1; + this.containers.push(container); + this.keysAndCardinalities[0] = key & 0xFF; + this.keysAndCardinalities[1] = (key >> 8) & 0xFF; + this.keysAndCardinalities[2] = lspecial - 1; + this.consumed_len_bytes = 5; + return this; + } else if (u8array[i] > 0xd0) { + // Special representation of tiny sets that are close together + const lspecial = u8array[i] & 0x0f; + this.keysAndCardinalities = new Uint8Array(lspecial * 4); + let pspecial = i + 1; + let key = u8array[pspecial + 2] | (u8array[pspecial + 3] << 8); + let value = u8array[pspecial] | (u8array[pspecial + 1] << 8); + let entry = (key << 16) | value; + let container; + container = new RoaringBitmapArray(1, new Uint8Array(4)); + container.array[0] = value & 0xFF; + container.array[1] = (value >> 8) & 0xFF; + this.containers.push(container); + this.keysAndCardinalities[0] = key; + this.keysAndCardinalities[1] = key >> 8; + pspecial += 4; + for (let ispecial = 1; ispecial < lspecial; ispecial += 1) { + entry += u8array[pspecial]; + value = entry & 0xffff; + key = entry >> 16; + container = this.addToArrayAt(key); + const cardinalityOld = container.cardinality; + container.array[cardinalityOld * 2] = value & 0xFF; + container.array[(cardinalityOld * 2) + 1] = (value >> 8) & 0xFF; + container.cardinality = cardinalityOld + 1; + pspecial += 1; + } + this.consumed_len_bytes = pspecial - i; + return this; } else if (u8array[i] < 0x3a) { // Special representation of tiny sets with arbitrary 32-bit integers const lspecial = u8array[i]; @@ -2282,7 +2328,7 @@ function loadDatabase(hooks) { */ class InlineNeighborsTree { /** - * @param {Uint8Array} encoded + * @param {Uint8Array} encoded * @param {number} start */ constructor( @@ -2301,7 +2347,8 @@ function loadDatabase(hooks) { const has_branches = (encoded[i] & 0x04) !== 0; /** @type {boolean} */ const is_suffixes_only = (encoded[i] & 0x01) !== 0; - let leaves_count = ((encoded[i] >> 4) & 0x0f) + 1; + let leaves_count = ((encoded[i] >> 4) & 0x07) + 1; + let leaves_is_run = (encoded[i] >> 7) !== 0; i += 1; let branch_count = 0; if (has_branches) { @@ -2311,8 +2358,10 @@ function loadDatabase(hooks) { const dlen = encoded[i] & 0x3f; if ((encoded[i] & 0x80) !== 0) { leaves_count = 0; + leaves_is_run = false; } i += 1; + /** @type {Uint8Array} */ let data = EMPTY_UINT8; if (!is_suffixes_only && dlen !== 0) { data = encoded.subarray(i, i + dlen); @@ -2324,8 +2373,10 @@ function loadDatabase(hooks) { const branch_nodes = []; for (let j = 0; j < branch_count; j += 1) { const branch_dlen = encoded[i] & 0x0f; - const branch_leaves_count = ((encoded[i] >> 4) & 0x0f) + 1; + const branch_leaves_count = ((encoded[i] >> 4) & 0x07) + 1; + const branch_leaves_is_run = (encoded[i] >> 7) !== 0; i += 1; + /** @type {Uint8Array} */ let branch_data = EMPTY_UINT8; if (!is_suffixes_only && branch_dlen !== 0) { branch_data = encoded.subarray(i, i + branch_dlen); @@ -2338,13 +2389,28 @@ function loadDatabase(hooks) { (branch_leaves_count - 1) & 0xff, ((branch_leaves_count - 1) >> 8) & 0xff, ); - branch_leaves.containers = [ - new RoaringBitmapArray( - branch_leaves_count, - encoded.subarray(i, i + (branch_leaves_count * 2)), - ), - ]; - i += branch_leaves_count * 2; + if (branch_leaves_is_run) { + branch_leaves.containers = [ + new RoaringBitmapRun( + 1, + Uint8Array.of( + encoded[i], + encoded[i + 1], + branch_leaves_count - 1, + 0, + ), + ), + ]; + i += 2; + } else { + branch_leaves.containers = [ + new RoaringBitmapArray( + branch_leaves_count, + encoded.subarray(i, i + (branch_leaves_count * 2)), + ), + ]; + i += branch_leaves_count * 2; + } branch_nodes.push(Promise.resolve( is_suffixes_only ? new SuffixSearchTree( @@ -2379,13 +2445,28 @@ function loadDatabase(hooks) { (leaves_count - 1) & 0xff, ((leaves_count - 1) >> 8) & 0xff, ); - leaves.containers = [ - new RoaringBitmapArray( - leaves_count, - encoded.subarray(i, i + (leaves_count * 2)), - ), - ]; - i += leaves_count * 2; + if (leaves_is_run) { + leaves.containers = [ + new RoaringBitmapRun( + 1, + Uint8Array.of( + encoded[i], + encoded[i + 1], + leaves_count - 1, + 0, + ), + ), + ]; + i += 2; + } else { + leaves.containers = [ + new RoaringBitmapArray( + leaves_count, + encoded.subarray(i, i + (leaves_count * 2)), + ), + ]; + i += leaves_count * 2; + } } return is_suffixes_only ? new SuffixSearchTree( @@ -2654,7 +2735,7 @@ function loadDatabase(hooks) { /** * @param {string} inputBase64 - * @returns {[Uint8Array, SearchTree]} + * @returns {[Uint8Array, SearchTree]} */ function makeSearchTreeFromBase64(inputBase64) { const input = makeUint8ArrayFromBase64(inputBase64); @@ -2972,7 +3053,10 @@ function loadDatabase(hooks) { // node with packed leaves and common 16bit prefix const leaves_count = no_leaves_flag !== 0 ? 0 : - ((compression_tag >> 4) & 0x0f) + 1; + ((compression_tag >> 4) & 0x07) + 1; + const leaves_is_run = no_leaves_flag !== 0 ? + false : + ((compression_tag >> 4) & 0x08) !== 0; const branch_count = is_long_compressed ? ((compression_tag >> 8) & 0xff) + 1 : 0; @@ -2994,16 +3078,25 @@ function loadDatabase(hooks) { for (let j = 0; j < branch_count; j += 1) { const branch_dlen = input[i] & 0x0f; const branch_leaves_count = ((input[i] >> 4) & 0x0f) + 1; + const branch_leaves_is_run = (input[i] >> 7) !== 0; i += 1; if (!is_pure_suffixes_only_node) { i += branch_dlen; } - i += branch_leaves_count * 2; + if (branch_leaves_is_run) { + i += 2; + } else { + i += branch_leaves_count * 2; + } } // branch keys i += branch_count; // leaves - i += leaves_count * 2; + if (leaves_is_run) { + i += 2; + } else { + i += leaves_count * 2; + } if (is_data_compressed) { const clen = ( 1 + // first compression header byte @@ -3305,7 +3398,7 @@ if (typeof window !== "undefined") { // eslint-disable-next-line max-len // polyfill https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array/fromBase64 /** - * @type {function(string): Uint8Array} base64 + * @type {function(string): Uint8Array} base64 */ //@ts-expect-error const makeUint8ArrayFromBase64 = Uint8Array.fromBase64 ? Uint8Array.fromBase64 : (string => { @@ -3318,7 +3411,7 @@ const makeUint8ArrayFromBase64 = Uint8Array.fromBase64 ? Uint8Array.fromBase64 : return bytes; }); /** - * @type {function(string): Uint8Array} base64 + * @type {function(string): Uint8Array} base64 */ //@ts-expect-error const makeUint8ArrayFromHex = Uint8Array.fromHex ? Uint8Array.fromHex : (string => {