Auto merge of #150217 - notriddle:stringdex-0.0.4, r=yotamofek,GuillaumeGomez
rustdoc: upgrade to stringdex 0.0.4 - code cleanup - smaller encoding for runs - fast path for the common encoding case
This commit is contained in:
commit
a0c97e3255
3 changed files with 118 additions and 26 deletions
|
|
@ -5357,9 +5357,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "stringdex"
|
||||
version = "0.0.3"
|
||||
version = "0.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "556a6126952cb2f5150057c98a77cc6c771027dea2825bf7fa03d3d638b0a4f8"
|
||||
checksum = "c6204af9e1e433f1ef9b6d44475c7089be33c91111d896463b9dfa20464b87f1"
|
||||
dependencies = [
|
||||
"stacker",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" }
|
|||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
smallvec = "1.8.1"
|
||||
stringdex = "=0.0.3"
|
||||
stringdex = "=0.0.4"
|
||||
tempfile = "3"
|
||||
threadpool = "1.8.1"
|
||||
tracing = "0.1"
|
||||
|
|
|
|||
|
|
@ -54,6 +54,52 @@ class RoaringBitmap {
|
|||
}
|
||||
this.consumed_len_bytes = pspecial - i;
|
||||
return this;
|
||||
} else if (u8array[i] > 0xe0) {
|
||||
// Special representation of tiny sets that are runs
|
||||
const lspecial = u8array[i] & 0x0f;
|
||||
this.keysAndCardinalities = new Uint8Array(lspecial * 4);
|
||||
i += 1;
|
||||
const key = u8array[i + 2] | (u8array[i + 3] << 8);
|
||||
const value = u8array[i] | (u8array[i + 1] << 8);
|
||||
const container = new RoaringBitmapRun(1, new Uint8Array(4));
|
||||
container.array[0] = value & 0xFF;
|
||||
container.array[1] = (value >> 8) & 0xFF;
|
||||
container.array[2] = lspecial - 1;
|
||||
this.containers.push(container);
|
||||
this.keysAndCardinalities[0] = key & 0xFF;
|
||||
this.keysAndCardinalities[1] = (key >> 8) & 0xFF;
|
||||
this.keysAndCardinalities[2] = lspecial - 1;
|
||||
this.consumed_len_bytes = 5;
|
||||
return this;
|
||||
} else if (u8array[i] > 0xd0) {
|
||||
// Special representation of tiny sets that are close together
|
||||
const lspecial = u8array[i] & 0x0f;
|
||||
this.keysAndCardinalities = new Uint8Array(lspecial * 4);
|
||||
let pspecial = i + 1;
|
||||
let key = u8array[pspecial + 2] | (u8array[pspecial + 3] << 8);
|
||||
let value = u8array[pspecial] | (u8array[pspecial + 1] << 8);
|
||||
let entry = (key << 16) | value;
|
||||
let container;
|
||||
container = new RoaringBitmapArray(1, new Uint8Array(4));
|
||||
container.array[0] = value & 0xFF;
|
||||
container.array[1] = (value >> 8) & 0xFF;
|
||||
this.containers.push(container);
|
||||
this.keysAndCardinalities[0] = key;
|
||||
this.keysAndCardinalities[1] = key >> 8;
|
||||
pspecial += 4;
|
||||
for (let ispecial = 1; ispecial < lspecial; ispecial += 1) {
|
||||
entry += u8array[pspecial];
|
||||
value = entry & 0xffff;
|
||||
key = entry >> 16;
|
||||
container = this.addToArrayAt(key);
|
||||
const cardinalityOld = container.cardinality;
|
||||
container.array[cardinalityOld * 2] = value & 0xFF;
|
||||
container.array[(cardinalityOld * 2) + 1] = (value >> 8) & 0xFF;
|
||||
container.cardinality = cardinalityOld + 1;
|
||||
pspecial += 1;
|
||||
}
|
||||
this.consumed_len_bytes = pspecial - i;
|
||||
return this;
|
||||
} else if (u8array[i] < 0x3a) {
|
||||
// Special representation of tiny sets with arbitrary 32-bit integers
|
||||
const lspecial = u8array[i];
|
||||
|
|
@ -2282,7 +2328,7 @@ function loadDatabase(hooks) {
|
|||
*/
|
||||
class InlineNeighborsTree {
|
||||
/**
|
||||
* @param {Uint8Array<ArrayBuffer>} encoded
|
||||
* @param {Uint8Array} encoded
|
||||
* @param {number} start
|
||||
*/
|
||||
constructor(
|
||||
|
|
@ -2301,7 +2347,8 @@ function loadDatabase(hooks) {
|
|||
const has_branches = (encoded[i] & 0x04) !== 0;
|
||||
/** @type {boolean} */
|
||||
const is_suffixes_only = (encoded[i] & 0x01) !== 0;
|
||||
let leaves_count = ((encoded[i] >> 4) & 0x0f) + 1;
|
||||
let leaves_count = ((encoded[i] >> 4) & 0x07) + 1;
|
||||
let leaves_is_run = (encoded[i] >> 7) !== 0;
|
||||
i += 1;
|
||||
let branch_count = 0;
|
||||
if (has_branches) {
|
||||
|
|
@ -2311,8 +2358,10 @@ function loadDatabase(hooks) {
|
|||
const dlen = encoded[i] & 0x3f;
|
||||
if ((encoded[i] & 0x80) !== 0) {
|
||||
leaves_count = 0;
|
||||
leaves_is_run = false;
|
||||
}
|
||||
i += 1;
|
||||
/** @type {Uint8Array} */
|
||||
let data = EMPTY_UINT8;
|
||||
if (!is_suffixes_only && dlen !== 0) {
|
||||
data = encoded.subarray(i, i + dlen);
|
||||
|
|
@ -2324,8 +2373,10 @@ function loadDatabase(hooks) {
|
|||
const branch_nodes = [];
|
||||
for (let j = 0; j < branch_count; j += 1) {
|
||||
const branch_dlen = encoded[i] & 0x0f;
|
||||
const branch_leaves_count = ((encoded[i] >> 4) & 0x0f) + 1;
|
||||
const branch_leaves_count = ((encoded[i] >> 4) & 0x07) + 1;
|
||||
const branch_leaves_is_run = (encoded[i] >> 7) !== 0;
|
||||
i += 1;
|
||||
/** @type {Uint8Array} */
|
||||
let branch_data = EMPTY_UINT8;
|
||||
if (!is_suffixes_only && branch_dlen !== 0) {
|
||||
branch_data = encoded.subarray(i, i + branch_dlen);
|
||||
|
|
@ -2338,13 +2389,28 @@ function loadDatabase(hooks) {
|
|||
(branch_leaves_count - 1) & 0xff,
|
||||
((branch_leaves_count - 1) >> 8) & 0xff,
|
||||
);
|
||||
branch_leaves.containers = [
|
||||
new RoaringBitmapArray(
|
||||
branch_leaves_count,
|
||||
encoded.subarray(i, i + (branch_leaves_count * 2)),
|
||||
),
|
||||
];
|
||||
i += branch_leaves_count * 2;
|
||||
if (branch_leaves_is_run) {
|
||||
branch_leaves.containers = [
|
||||
new RoaringBitmapRun(
|
||||
1,
|
||||
Uint8Array.of(
|
||||
encoded[i],
|
||||
encoded[i + 1],
|
||||
branch_leaves_count - 1,
|
||||
0,
|
||||
),
|
||||
),
|
||||
];
|
||||
i += 2;
|
||||
} else {
|
||||
branch_leaves.containers = [
|
||||
new RoaringBitmapArray(
|
||||
branch_leaves_count,
|
||||
encoded.subarray(i, i + (branch_leaves_count * 2)),
|
||||
),
|
||||
];
|
||||
i += branch_leaves_count * 2;
|
||||
}
|
||||
branch_nodes.push(Promise.resolve(
|
||||
is_suffixes_only ?
|
||||
new SuffixSearchTree(
|
||||
|
|
@ -2379,13 +2445,28 @@ function loadDatabase(hooks) {
|
|||
(leaves_count - 1) & 0xff,
|
||||
((leaves_count - 1) >> 8) & 0xff,
|
||||
);
|
||||
leaves.containers = [
|
||||
new RoaringBitmapArray(
|
||||
leaves_count,
|
||||
encoded.subarray(i, i + (leaves_count * 2)),
|
||||
),
|
||||
];
|
||||
i += leaves_count * 2;
|
||||
if (leaves_is_run) {
|
||||
leaves.containers = [
|
||||
new RoaringBitmapRun(
|
||||
1,
|
||||
Uint8Array.of(
|
||||
encoded[i],
|
||||
encoded[i + 1],
|
||||
leaves_count - 1,
|
||||
0,
|
||||
),
|
||||
),
|
||||
];
|
||||
i += 2;
|
||||
} else {
|
||||
leaves.containers = [
|
||||
new RoaringBitmapArray(
|
||||
leaves_count,
|
||||
encoded.subarray(i, i + (leaves_count * 2)),
|
||||
),
|
||||
];
|
||||
i += leaves_count * 2;
|
||||
}
|
||||
}
|
||||
return is_suffixes_only ?
|
||||
new SuffixSearchTree(
|
||||
|
|
@ -2654,7 +2735,7 @@ function loadDatabase(hooks) {
|
|||
|
||||
/**
|
||||
* @param {string} inputBase64
|
||||
* @returns {[Uint8Array<ArrayBuffer>, SearchTree]}
|
||||
* @returns {[Uint8Array, SearchTree]}
|
||||
*/
|
||||
function makeSearchTreeFromBase64(inputBase64) {
|
||||
const input = makeUint8ArrayFromBase64(inputBase64);
|
||||
|
|
@ -2972,7 +3053,9 @@ function loadDatabase(hooks) {
|
|||
// node with packed leaves and common 16bit prefix
|
||||
const leaves_count = no_leaves_flag !== 0 ?
|
||||
0 :
|
||||
((compression_tag >> 4) & 0x0f) + 1;
|
||||
((compression_tag >> 4) & 0x07) + 1;
|
||||
const leaves_is_run = no_leaves_flag === 0 &&
|
||||
((compression_tag >> 4) & 0x08) !== 0;
|
||||
const branch_count = is_long_compressed ?
|
||||
((compression_tag >> 8) & 0xff) + 1 :
|
||||
0;
|
||||
|
|
@ -2994,16 +3077,25 @@ function loadDatabase(hooks) {
|
|||
for (let j = 0; j < branch_count; j += 1) {
|
||||
const branch_dlen = input[i] & 0x0f;
|
||||
const branch_leaves_count = ((input[i] >> 4) & 0x0f) + 1;
|
||||
const branch_leaves_is_run = (input[i] >> 7) !== 0;
|
||||
i += 1;
|
||||
if (!is_pure_suffixes_only_node) {
|
||||
i += branch_dlen;
|
||||
}
|
||||
i += branch_leaves_count * 2;
|
||||
if (branch_leaves_is_run) {
|
||||
i += 2;
|
||||
} else {
|
||||
i += branch_leaves_count * 2;
|
||||
}
|
||||
}
|
||||
// branch keys
|
||||
i += branch_count;
|
||||
// leaves
|
||||
i += leaves_count * 2;
|
||||
if (leaves_is_run) {
|
||||
i += 2;
|
||||
} else {
|
||||
i += leaves_count * 2;
|
||||
}
|
||||
if (is_data_compressed) {
|
||||
const clen = (
|
||||
1 + // first compression header byte
|
||||
|
|
@ -3305,7 +3397,7 @@ if (typeof window !== "undefined") {
|
|||
// eslint-disable-next-line max-len
|
||||
// polyfill https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Uint8Array/fromBase64
|
||||
/**
|
||||
* @type {function(string): Uint8Array<ArrayBuffer>} base64
|
||||
* @type {function(string): Uint8Array} base64
|
||||
*/
|
||||
//@ts-expect-error
|
||||
const makeUint8ArrayFromBase64 = Uint8Array.fromBase64 ? Uint8Array.fromBase64 : (string => {
|
||||
|
|
@ -3318,7 +3410,7 @@ const makeUint8ArrayFromBase64 = Uint8Array.fromBase64 ? Uint8Array.fromBase64 :
|
|||
return bytes;
|
||||
});
|
||||
/**
|
||||
* @type {function(string): Uint8Array<ArrayBuffer>} base64
|
||||
* @type {function(string): Uint8Array} base64
|
||||
*/
|
||||
//@ts-expect-error
|
||||
const makeUint8ArrayFromHex = Uint8Array.fromHex ? Uint8Array.fromHex : (string => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue