From 5f92951d4ff91085ae7af62b2abf43fffde8b35d Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sat, 3 Apr 2021 19:08:14 -0700 Subject: [PATCH 1/2] rustdoc: sort search index items for compression This should not affect the appearance of the docs pages themselves. This makes the pre-compressed search index smaller, thanks to the empty-string path duplication format, and also the gzipped version, by giving the algorithm more structure to work with. rust$ wc -c search-index-old.js search-index-new.js 2628334 search-index-old.js 2586181 search-index-new.js 5214515 total rust$ gzip search-index-* rust$ wc -c search-index-old.js.gz search-index-new.js.gz 239486 search-index-old.js.gz 237386 search-index-new.js.gz 476872 total --- src/librustdoc/clean/types.rs | 4 ++-- src/librustdoc/formats/cache.rs | 9 +-------- src/librustdoc/html/render/cache.rs | 23 ++++++++++++++++------- src/librustdoc/html/render/mod.rs | 1 + 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/librustdoc/clean/types.rs b/src/librustdoc/clean/types.rs index 4132e187c72a..49a91240b6ac 100644 --- a/src/librustdoc/clean/types.rs +++ b/src/librustdoc/clean/types.rs @@ -914,7 +914,7 @@ impl Attributes { .collect() } - crate fn get_doc_aliases(&self) -> FxHashSet { + crate fn get_doc_aliases(&self) -> Box<[String]> { let mut aliases = FxHashSet::default(); for attr in self.other_attrs.lists(sym::doc).filter(|a| a.has_name(sym::alias)) { @@ -931,7 +931,7 @@ impl Attributes { aliases.insert(attr.value_str().map(|s| s.to_string()).unwrap()); } } - aliases + aliases.into_iter().collect::>().into() } } diff --git a/src/librustdoc/formats/cache.rs b/src/librustdoc/formats/cache.rs index 01bceb1d910c..7100cc87b0cc 100644 --- a/src/librustdoc/formats/cache.rs +++ b/src/librustdoc/formats/cache.rs @@ -309,15 +309,8 @@ impl<'a, 'tcx> DocFolder for CacheBuilder<'a, 'tcx> { parent, parent_idx: None, search_type: get_index_search_type(&item, &self.empty_cache, self.tcx), + aliases: item.attrs.get_doc_aliases(), }); - - for alias in item.attrs.get_doc_aliases() { - self.cache - .aliases - .entry(alias.to_lowercase()) - .or_insert(Vec::new()) - .push(self.cache.search_index.len() - 1); - } } } (Some(parent), None) if is_inherent_impl_item => { diff --git a/src/librustdoc/html/render/cache.rs b/src/librustdoc/html/render/cache.rs index 5d49a4947276..022afee3105c 100644 --- a/src/librustdoc/html/render/cache.rs +++ b/src/librustdoc/html/render/cache.rs @@ -82,19 +82,28 @@ crate fn build_index<'tcx>(krate: &clean::Crate, cache: &mut Cache, tcx: TyCtxt< parent: Some(did), parent_idx: None, search_type: get_index_search_type(&item, cache, tcx), + aliases: item.attrs.get_doc_aliases(), }); - for alias in item.attrs.get_doc_aliases() { - cache - .aliases - .entry(alias.to_lowercase()) - .or_insert(Vec::new()) - .push(cache.search_index.len() - 1); - } } } let Cache { ref mut search_index, ref paths, ref mut aliases, .. } = *cache; + // Sort search index items. This improves the compressibility of the search index. + search_index.sort_unstable_by(|k1, k2| { + // `sort_unstable_by_key` produces lifetime errors + let k1 = (&k1.path, &k1.name, &k1.ty, &k1.parent); + let k2 = (&k2.path, &k2.name, &k2.ty, &k2.parent); + std::cmp::Ord::cmp(&k1, &k2) + }); + + // Set up alias indexes. + for (i, item) in search_index.iter().enumerate() { + for alias in &item.aliases[..] { + aliases.entry(alias.to_lowercase()).or_insert(Vec::new()).push(i); + } + } + // Reduce `DefId` in paths into smaller sequential numbers, // and prune the paths that do not appear in the index. let mut lastpath = String::new(); diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 07bd26a4c5eb..cb5ca5b5e641 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -164,6 +164,7 @@ crate struct IndexItem { crate parent: Option, crate parent_idx: Option, crate search_type: Option, + crate aliases: Box<[String]>, } /// A type used for the search index. From 2370e3b439aa01982c33bbfe9823337a6231207f Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Sun, 4 Apr 2021 13:08:17 -0700 Subject: [PATCH 2/2] Get rid of unneeded `aliases` field --- src/librustdoc/formats/cache.rs | 4 ---- src/librustdoc/html/render/cache.rs | 8 ++++++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/librustdoc/formats/cache.rs b/src/librustdoc/formats/cache.rs index 7100cc87b0cc..3e17db7fa7f2 100644 --- a/src/librustdoc/formats/cache.rs +++ b/src/librustdoc/formats/cache.rs @@ -120,10 +120,6 @@ crate struct Cache { // when gathering trait documentation on a type, hold impls here while // folding and add them to the cache later on if we find the trait. orphan_trait_impls: Vec<(DefId, FxHashSet, Impl)>, - - /// Aliases added through `#[doc(alias = "...")]`. Since a few items can have the same alias, - /// we need the alias element to have an array of items. - crate aliases: BTreeMap>, } /// This struct is used to wrap the `cache` and `tcx` in order to run `DocFolder`. diff --git a/src/librustdoc/html/render/cache.rs b/src/librustdoc/html/render/cache.rs index 022afee3105c..2265905dcbaf 100644 --- a/src/librustdoc/html/render/cache.rs +++ b/src/librustdoc/html/render/cache.rs @@ -87,7 +87,11 @@ crate fn build_index<'tcx>(krate: &clean::Crate, cache: &mut Cache, tcx: TyCtxt< } } - let Cache { ref mut search_index, ref paths, ref mut aliases, .. } = *cache; + let Cache { ref mut search_index, ref paths, .. } = *cache; + + // Aliases added through `#[doc(alias = "...")]`. Since a few items can have the same alias, + // we need the alias element to have an array of items. + let mut aliases: BTreeMap> = BTreeMap::new(); // Sort search index items. This improves the compressibility of the search index. search_index.sort_unstable_by(|k1, k2| { @@ -210,7 +214,7 @@ crate fn build_index<'tcx>(krate: &clean::Crate, cache: &mut Cache, tcx: TyCtxt< doc: crate_doc, items: crate_items, paths: crate_paths, - aliases, + aliases: &aliases, }) .expect("failed serde conversion") // All these `replace` calls are because we have to go through JS string for JSON content.