rustdoc: upgrade to stringdex 0.0.4

- code cleanup
- smaller encoding for runs
- fast path for the common encoding case
This commit is contained in:
Michael Howell 2025-12-20 23:44:06 -07:00
parent cb79c42008
commit 08c396b91e
3 changed files with 119 additions and 26 deletions

View file

@ -5357,9 +5357,9 @@ dependencies = [
[[package]]
name = "stringdex"
version = "0.0.3"
version = "0.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "556a6126952cb2f5150057c98a77cc6c771027dea2825bf7fa03d3d638b0a4f8"
checksum = "c6204af9e1e433f1ef9b6d44475c7089be33c91111d896463b9dfa20464b87f1"
dependencies = [
"stacker",
]

View file

@ -22,7 +22,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
smallvec = "1.8.1"
stringdex = "=0.0.3"
stringdex = "=0.0.4"
tempfile = "3"
threadpool = "1.8.1"
tracing = "0.1"

View file

@ -54,6 +54,52 @@ class RoaringBitmap {
}
this.consumed_len_bytes = pspecial - i;
return this;
} else if (u8array[i] > 0xe0) {
// Special representation of tiny sets that are runs
const lspecial = u8array[i] & 0x0f;
this.keysAndCardinalities = new Uint8Array(lspecial * 4);
i += 1;
const key = u8array[i + 2] | (u8array[i + 3] << 8);
const value = u8array[i] | (u8array[i + 1] << 8);
const container = new RoaringBitmapRun(1, new Uint8Array(4));
container.array[0] = value & 0xFF;
container.array[1] = (value >> 8) & 0xFF;
container.array[2] = lspecial - 1;
this.containers.push(container);
this.keysAndCardinalities[0] = key & 0xFF;
this.keysAndCardinalities[1] = (key >> 8) & 0xFF;
this.keysAndCardinalities[2] = lspecial - 1;
this.consumed_len_bytes = 5;
return this;
} else if (u8array[i] > 0xd0) {
// Special representation of tiny sets that are close together
const lspecial = u8array[i] & 0x0f;
this.keysAndCardinalities = new Uint8Array(lspecial * 4);
let pspecial = i + 1;
let key = u8array[pspecial + 2] | (u8array[pspecial + 3] << 8);
let value = u8array[pspecial] | (u8array[pspecial + 1] << 8);
let entry = (key << 16) | value;
let container;
container = new RoaringBitmapArray(1, new Uint8Array(4));
container.array[0] = value & 0xFF;
container.array[1] = (value >> 8) & 0xFF;
this.containers.push(container);
this.keysAndCardinalities[0] = key;
this.keysAndCardinalities[1] = key >> 8;
pspecial += 4;
for (let ispecial = 1; ispecial < lspecial; ispecial += 1) {
entry += u8array[pspecial];
value = entry & 0xffff;
key = entry >> 16;
container = this.addToArrayAt(key);
const cardinalityOld = container.cardinality;
container.array[cardinalityOld * 2] = value & 0xFF;
container.array[(cardinalityOld * 2) + 1] = (value >> 8) & 0xFF;
container.cardinality = cardinalityOld + 1;
pspecial += 1;
}
this.consumed_len_bytes = pspecial - i;
return this;
} else if (u8array[i] < 0x3a) {
// Special representation of tiny sets with arbitrary 32-bit integers
const lspecial = u8array[i];
@ -2282,7 +2328,7 @@ function loadDatabase(hooks) {
*/
class InlineNeighborsTree {
/**
* @param {Uint8Array<ArrayBuffer>} encoded
* @param {Uint8Array} encoded
* @param {number} start
*/
constructor(
@ -2301,7 +2347,8 @@ function loadDatabase(hooks) {
const has_branches = (encoded[i] & 0x04) !== 0;
/** @type {boolean} */
const is_suffixes_only = (encoded[i] & 0x01) !== 0;
let leaves_count = ((encoded[i] >> 4) & 0x0f) + 1;
let leaves_count = ((encoded[i] >> 4) & 0x07) + 1;
let leaves_is_run = (encoded[i] >> 7) !== 0;
i += 1;
let branch_count = 0;
if (has_branches) {
@ -2311,8 +2358,10 @@ function loadDatabase(hooks) {
const dlen = encoded[i] & 0x3f;
if ((encoded[i] & 0x80) !== 0) {
leaves_count = 0;
leaves_is_run = false;
}
i += 1;
/** @type {Uint8Array} */
let data = EMPTY_UINT8;
if (!is_suffixes_only && dlen !== 0) {
data = encoded.subarray(i, i + dlen);
@ -2324,8 +2373,10 @@ function loadDatabase(hooks) {
const branch_nodes = [];
for (let j = 0; j < branch_count; j += 1) {
const branch_dlen = encoded[i] & 0x0f;
const branch_leaves_count = ((encoded[i] >> 4) & 0x0f) + 1;
const branch_leaves_count = ((encoded[i] >> 4) & 0x07) + 1;
const branch_leaves_is_run = (encoded[i] >> 7) !== 0;
i += 1;
/** @type {Uint8Array} */
let branch_data = EMPTY_UINT8;
if (!is_suffixes_only && branch_dlen !== 0) {
branch_data = encoded.subarray(i, i + branch_dlen);
@ -2338,13 +2389,28 @@ function loadDatabase(hooks) {
(branch_leaves_count - 1) & 0xff,
((branch_leaves_count - 1) >> 8) & 0xff,
);
branch_leaves.containers = [
new RoaringBitmapArray(
branch_leaves_count,
encoded.subarray(i, i + (branch_leaves_count * 2)),
),
];
i += branch_leaves_count * 2;
if (branch_leaves_is_run) {
branch_leaves.containers = [
new RoaringBitmapRun(
1,
Uint8Array.of(
encoded[i],
encoded[i + 1],
branch_leaves_count - 1,
0,
),
),
];
i += 2;
} else {
branch_leaves.containers = [
new RoaringBitmapArray(
branch_leaves_count,
encoded.subarray(i, i + (branch_leaves_count * 2)),
),
];
i += branch_leaves_count * 2;
}
branch_nodes.push(Promise.resolve(
is_suffixes_only ?
new SuffixSearchTree(
@ -2379,13 +2445,28 @@ function loadDatabase(hooks) {
(leaves_count - 1) & 0xff,
((leaves_count - 1) >> 8) & 0xff,
);
leaves.containers = [
new RoaringBitmapArray(
leaves_count,
encoded.subarray(i, i + (leaves_count * 2)),
),
];
i += leaves_count * 2;
if (leaves_is_run) {
leaves.containers = [
new RoaringBitmapRun(
1,
Uint8Array.of(
encoded[i],
encoded[i + 1],
leaves_count - 1,
0,
),
),
];
i += 2;
} else {
leaves.containers = [
new RoaringBitmapArray(
leaves_count,
encoded.subarray(i, i + (leaves_count * 2)),
),
];
i += leaves_count * 2;
}
}
return is_suffixes_only ?
new SuffixSearchTree(
@ -2654,7 +2735,7 @@ function loadDatabase(hooks) {
/**
* @param {string} inputBase64
* @returns {[Uint8Array<ArrayBuffer>, SearchTree]}
* @returns {[Uint8Array, SearchTree]}
*/
function makeSearchTreeFromBase64(inputBase64) {
const input = makeUint8ArrayFromBase64(inputBase64);
@ -2972,7 +3053,10 @@ function loadDatabase(hooks) {
// node with packed leaves and common 16bit prefix
const leaves_count = no_leaves_flag !== 0 ?
0 :
((compression_tag >> 4) & 0x0f) + 1;
((compression_tag >> 4) & 0x07) + 1;
const leaves_is_run = no_leaves_flag !== 0 ?
false :
((compression_tag >> 4) & 0x08) !== 0;
const branch_count = is_long_compressed ?
((compression_tag >> 8) & 0xff) + 1 :
0;
@ -2994,16 +3078,25 @@ function loadDatabase(hooks) {
for (let j = 0; j < branch_count; j += 1) {
const branch_dlen = input[i] & 0x0f;
const branch_leaves_count = ((input[i] >> 4) & 0x0f) + 1;
const branch_leaves_is_run = (input[i] >> 7) !== 0;
i += 1;
if (!is_pure_suffixes_only_node) {
i += branch_dlen;
}
i += branch_leaves_count * 2;
if (branch_leaves_is_run) {
i += 2;
} else {
i += branch_leaves_count * 2;
}
}
// branch keys
i += branch_count;
// leaves
i += leaves_count * 2;
if (leaves_is_run) {
i += 2;
} else {
i += leaves_count * 2;
}
if (is_data_compressed) {
const clen = (
1 + // first compression header byte
@ -3305,7 +3398,7 @@ if (typeof window !== "undefined") {
// eslint-disable-next-line max-len
// polyfill https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array/fromBase64
/**
* @type {function(string): Uint8Array<ArrayBuffer>} base64
* @type {function(string): Uint8Array} base64
*/
//@ts-expect-error
const makeUint8ArrayFromBase64 = Uint8Array.fromBase64 ? Uint8Array.fromBase64 : (string => {
@ -3318,7 +3411,7 @@ const makeUint8ArrayFromBase64 = Uint8Array.fromBase64 ? Uint8Array.fromBase64 :
return bytes;
});
/**
* @type {function(string): Uint8Array<ArrayBuffer>} base64
* @type {function(string): Uint8Array} base64
*/
//@ts-expect-error
const makeUint8ArrayFromHex = Uint8Array.fromHex ? Uint8Array.fromHex : (string => {