rustdoc-search: yet another stringdex optimization attempt
This one's uses a different tactic. It shouldn't significantly increase the amount of downloaded index data, but still reduces the amount of disk usage. This one works by changing the suffix-only node representation to omit some data that's needed for checking. Since those nodes make up the bulk of the tree, it reduces the data they store, but also requires validating the match by fetching the name itself (but the names list is pretty small, and when I tried it with wordnet "indexing" it was about the same).
This commit is contained in:
parent
5ab69249f3
commit
80e18051cb
5 changed files with 648 additions and 165 deletions
|
|
@ -5225,9 +5225,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "stringdex"
|
||||
version = "0.0.1-alpha4"
|
||||
version = "0.0.1-alpha9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2841fd43df5b1ff1b042e167068a1fe9b163dc93041eae56ab2296859013a9a0"
|
||||
checksum = "7081029913fd7d591c0112182aba8c98ae886b4f12edb208130496cd17dc3c15"
|
||||
dependencies = [
|
||||
"stacker",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" }
|
|||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
smallvec = "1.8.1"
|
||||
stringdex = { version = "0.0.1-alpha4" }
|
||||
stringdex = { version = "0.0.1-alpha9" }
|
||||
tempfile = "3"
|
||||
threadpool = "1.8.1"
|
||||
tracing = "0.1"
|
||||
|
|
|
|||
|
|
@ -1211,7 +1211,7 @@ class DocSearch {
|
|||
* will never fulfill.
|
||||
*/
|
||||
async buildIndex() {
|
||||
const nn = this.database.getIndex("normalizedName");
|
||||
const nn = this.database.getData("normalizedName");
|
||||
if (!nn) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -3706,7 +3706,7 @@ class DocSearch {
|
|||
* @returns {AsyncGenerator<rustdoc.ResultObject>}
|
||||
*/
|
||||
async function*(currentCrate) {
|
||||
const index = this.database.getIndex("normalizedName");
|
||||
const index = this.database.getData("normalizedName");
|
||||
if (!index) {
|
||||
return;
|
||||
}
|
||||
|
|
@ -3835,8 +3835,7 @@ class DocSearch {
|
|||
};
|
||||
if (elem.normalizedPathLast === "") {
|
||||
// faster full-table scan for this specific case.
|
||||
const nameData = this.database.getData("name");
|
||||
const l = nameData ? nameData.length : 0;
|
||||
const l = index.length;
|
||||
for (let id = 0; id < l; ++id) {
|
||||
if (!idDuplicates.has(id)) {
|
||||
idDuplicates.add(id);
|
||||
|
|
@ -3938,7 +3937,7 @@ class DocSearch {
|
|||
* @returns {AsyncGenerator<rustdoc.ResultObject>}
|
||||
*/
|
||||
async function*(inputs, output, typeInfo, currentCrate) {
|
||||
const index = this.database.getIndex("normalizedName");
|
||||
const index = this.database.getData("normalizedName");
|
||||
if (!index) {
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
13
src/librustdoc/html/static/js/stringdex.d.ts
vendored
13
src/librustdoc/html/static/js/stringdex.d.ts
vendored
|
|
@ -5,17 +5,8 @@ declare namespace stringdex {
|
|||
* The client interface to Stringdex.
|
||||
*/
|
||||
interface Database {
|
||||
getIndex(colname: string): SearchTree|undefined;
|
||||
getData(colname: string): DataColumn|undefined;
|
||||
}
|
||||
/**
|
||||
* A search index file.
|
||||
*/
|
||||
interface SearchTree {
|
||||
trie(): Trie;
|
||||
search(name: Uint8Array|string): Promise<Trie?>;
|
||||
searchLev(name: Uint8Array|string): AsyncGenerator<Trie>;
|
||||
}
|
||||
/**
|
||||
* A compressed node in the search tree.
|
||||
*
|
||||
|
|
@ -29,9 +20,7 @@ declare namespace stringdex {
|
|||
matches(): RoaringBitmap;
|
||||
substringMatches(): AsyncGenerator<RoaringBitmap>;
|
||||
prefixMatches(): AsyncGenerator<RoaringBitmap>;
|
||||
keys(): Uint8Array;
|
||||
keysExcludeSuffixOnly(): Uint8Array;
|
||||
children(): [number, Promise<Trie>][];
|
||||
childrenExcludeSuffixOnly(): [number, Promise<Trie>][];
|
||||
child(id: number): Promise<Trie>?;
|
||||
}
|
||||
|
|
@ -41,6 +30,8 @@ declare namespace stringdex {
|
|||
interface DataColumn {
|
||||
isEmpty(id: number): boolean;
|
||||
at(id: number): Promise<Uint8Array|undefined>;
|
||||
search(name: Uint8Array|string): Promise<Trie?>;
|
||||
searchLev(name: Uint8Array|string): AsyncGenerator<Trie>;
|
||||
length: number,
|
||||
}
|
||||
/**
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue