rustdoc-search: yet another stringdex optimization attempt

This one's uses a different tactic. It shouldn't significantly increase the amount of downloaded index data, but still reduces the amount of disk usage. This one works by changing the suffix-only node representation to omit some data that's needed for checking. Since those nodes make up the bulk of the tree, it reduces the data they store, but also requires validating the match by fetching the name itself (but the names list is pretty small, and when I tried it with wordnet "indexing" it was about the same).
2025-08-26 19:46:50 -07:00 · 2025-08-26 19:46:50 -07:00 · 80e18051cb
commit 80e18051cb
parent 5ab69249f3
5 changed files with 648 additions and 165 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -5225,9 +5225,9 @@ dependencies = [

 [[package]]
 name = "stringdex"
-version = "0.0.1-alpha4"
+version = "0.0.1-alpha9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2841fd43df5b1ff1b042e167068a1fe9b163dc93041eae56ab2296859013a9a0"
+checksum = "7081029913fd7d591c0112182aba8c98ae886b4f12edb208130496cd17dc3c15"
 dependencies = [
 "stacker",
 ]
--- a/src/librustdoc/Cargo.toml
+++ b/src/librustdoc/Cargo.toml
@ -21,7 +21,7 @@ rustdoc-json-types = { path = "../rustdoc-json-types" }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 smallvec = "1.8.1"
-stringdex = { version = "0.0.1-alpha4" }
+stringdex = { version = "0.0.1-alpha9" }
 tempfile = "3"
 threadpool = "1.8.1"
 tracing = "0.1"
--- a/src/librustdoc/html/static/js/search.js
+++ b/src/librustdoc/html/static/js/search.js
@ -1211,7 +1211,7 @@ class DocSearch {
     * will never fulfill.
     */
    async buildIndex() {
-        const nn = this.database.getIndex("normalizedName");
+        const nn = this.database.getData("normalizedName");
        if (!nn) {
            return;
        }
@ -3706,7 +3706,7 @@ class DocSearch {
             * @returns {AsyncGenerator<rustdoc.ResultObject>}
             */
            async function*(currentCrate) {
-                const index = this.database.getIndex("normalizedName");
+                const index = this.database.getData("normalizedName");
                if (!index) {
                    return;
                }
@ -3835,8 +3835,7 @@ class DocSearch {
                };
                if (elem.normalizedPathLast === "") {
                    // faster full-table scan for this specific case.
-                    const nameData = this.database.getData("name");
-                    const l = nameData ? nameData.length : 0;
+                    const l = index.length;
                    for (let id = 0; id < l; ++id) {
                        if (!idDuplicates.has(id)) {
                            idDuplicates.add(id);
@ -3938,7 +3937,7 @@ class DocSearch {
             * @returns {AsyncGenerator<rustdoc.ResultObject>}
             */
            async function*(inputs, output, typeInfo, currentCrate) {
-                const index = this.database.getIndex("normalizedName");
+                const index = this.database.getData("normalizedName");
                if (!index) {
                    return;
                }
--- a/src/librustdoc/html/static/js/stringdex.d.ts
+++ b/src/librustdoc/html/static/js/stringdex.d.ts
@ -5,17 +5,8 @@ declare namespace stringdex {
     * The client interface to Stringdex.
     */
    interface Database {
-        getIndex(colname: string): SearchTree|undefined;
        getData(colname: string): DataColumn|undefined;
    }
-    /**
-     * A search index file.
-     */
-    interface SearchTree {
-        trie(): Trie;
-        search(name: Uint8Array|string): Promise<Trie?>;
-        searchLev(name: Uint8Array|string): AsyncGenerator<Trie>;
-    }
    /**
     * A compressed node in the search tree.
     *
@ -29,9 +20,7 @@ declare namespace stringdex {
        matches(): RoaringBitmap;
        substringMatches(): AsyncGenerator<RoaringBitmap>;
        prefixMatches(): AsyncGenerator<RoaringBitmap>;
-        keys(): Uint8Array;
        keysExcludeSuffixOnly(): Uint8Array;
-        children(): [number, Promise<Trie>][];
        childrenExcludeSuffixOnly(): [number, Promise<Trie>][];
        child(id: number): Promise<Trie>?;
    }
@ -41,6 +30,8 @@ declare namespace stringdex {
    interface DataColumn {
        isEmpty(id: number): boolean;
        at(id: number): Promise<Uint8Array|undefined>;
+        search(name: Uint8Array|string): Promise<Trie?>;
+        searchLev(name: Uint8Array|string): AsyncGenerator<Trie>;
        length: number,
    }
    /**
--- a/src/librustdoc/html/static/js/stringdex.js
+++ b/src/librustdoc/html/static/js/stringdex.js