Rollup merge of #133981 - aDotInTheVoid:document-docs-ids, r=fmease

rustdoc-json: Refractor and document Id's

Closes #133780

While working on documenting Id's, I realized alot of the way they were generated was weird and unnecessary. Eg:

1. The fully uninterned id type was `(FullItemId, Option<FullItemId>)`, meaning it wasn't actually full!
2. None of the extra fields in `Option<FullItemId>` would ever be used
3. `imported_item_id` was a `rustdoc_json_types::Id` instead of a simpler `DefId`.

I believe the new implementation still covers all the same cases, but in a more principled way (and explaining why each piece is needed).

This was written to be reviewed commit-by-commit, but it might be easier to review all at once if you're not interested in tracking how the original code became the final code.

cc ``@its-the-shrimp``

r? ``@fmease``
This commit is contained in:
Matthias Krüger 2025-03-13 10:58:15 +01:00 committed by GitHub
commit b3ab69504a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 126 additions and 74 deletions

View file

@ -7,14 +7,13 @@
use rustc_abi::ExternAbi;
use rustc_ast::ast;
use rustc_attr_parsing::DeprecatedSince;
use rustc_hir::def::{CtorKind, DefKind};
use rustc_hir::def::CtorKind;
use rustc_hir::def_id::DefId;
use rustc_metadata::rendered_const;
use rustc_middle::{bug, ty};
use rustc_span::{Pos, Symbol, sym};
use rustc_span::{Pos, Symbol};
use rustdoc_json_types::*;
use super::FullItemId;
use crate::clean::{self, ItemId};
use crate::formats::FormatRenderer;
use crate::formats::item_type::ItemType;
@ -108,67 +107,6 @@ impl JsonRenderer<'_> {
}
}
pub(crate) fn id_from_item_default(&self, item_id: ItemId) -> Id {
self.id_from_item_inner(item_id, None, None)
}
pub(crate) fn id_from_item_inner(
&self,
item_id: ItemId,
name: Option<Symbol>,
extra: Option<Id>,
) -> Id {
let make_part = |def_id: DefId, name: Option<Symbol>, extra: Option<Id>| {
let name = match name {
Some(name) => Some(name),
None => {
// We need this workaround because primitive types' DefId actually refers to
// their parent module, which isn't present in the output JSON items. So
// instead, we directly get the primitive symbol
if matches!(self.tcx.def_kind(def_id), DefKind::Mod)
&& let Some(prim) = self
.tcx
.get_attrs(def_id, sym::rustc_doc_primitive)
.find_map(|attr| attr.value_str())
{
Some(prim)
} else {
self.tcx.opt_item_name(def_id)
}
}
};
FullItemId { def_id, name, extra }
};
let key = match item_id {
ItemId::DefId(did) => (make_part(did, name, extra), None),
ItemId::Blanket { for_, impl_id } => {
(make_part(impl_id, None, None), Some(make_part(for_, name, extra)))
}
ItemId::Auto { for_, trait_ } => {
(make_part(trait_, None, None), Some(make_part(for_, name, extra)))
}
};
let mut interner = self.id_interner.borrow_mut();
let len = interner.len();
*interner
.entry(key)
.or_insert_with(|| Id(len.try_into().expect("too many items in a crate")))
}
pub(crate) fn id_from_item(&self, item: &clean::Item) -> Id {
match item.kind {
clean::ItemKind::ImportItem(ref import) => {
let extra =
import.source.did.map(ItemId::from).map(|i| self.id_from_item_default(i));
self.id_from_item_inner(item.item_id, item.name, extra)
}
_ => self.id_from_item_inner(item.item_id, item.name, None),
}
}
fn ids(&self, items: impl IntoIterator<Item = clean::Item>) -> Vec<Id> {
items
.into_iter()

122
src/librustdoc/json/ids.rs Normal file
View file

@ -0,0 +1,122 @@
//! Id handling for rustdoc-json.
//!
//! Manages the creation of [`rustdoc_json_types::Id`] and the
//! fact that these don't correspond exactly to [`DefId`], because
//! [`rustdoc_json_types::Item`] doesn't correspond exactly to what
//! other phases think of as an "item".
use rustc_data_structures::fx::FxHashMap;
use rustc_hir::def::DefKind;
use rustc_hir::def_id::DefId;
use rustc_span::{Symbol, sym};
use rustdoc_json_types as types;
use super::JsonRenderer;
use crate::clean;
pub(super) type IdInterner = FxHashMap<FullItemId, types::Id>;
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
/// An uninterned id.
///
/// Each one corresponds to exactly one of both:
/// 1. [`rustdoc_json_types::Item`].
/// 2. [`rustdoc_json_types::Id`] transitively (as each `Item` has an `Id`).
///
/// It's *broadly* equivalent to a [`DefId`], but needs slightly more information
/// to fully disambiguate items, because sometimes we choose to split a single HIR
/// item into multiple JSON items, or have items with no corresponding HIR item.
pub(super) struct FullItemId {
/// The "main" id of the item.
///
/// In most cases this uniquely identifies the item, the other fields are just
/// used for edge-cases.
def_id: DefId,
/// An extra [`DefId`], which we need for:
///
/// 1. Auto-trait impls synthesized by rustdoc.
/// 2. Blanket impls synthesized by rustdoc.
/// 3. Splitting of reexports of multiple items.
///
/// E.g:
///
/// ```rust
/// mod module {
/// pub struct Foo {} // Exists in type namespace
/// pub fn Foo(){} // Exists in value namespace
/// }
///
/// pub use module::Foo; // Imports both items
/// ```
///
/// In HIR, the `pub use` is just 1 item, but in rustdoc-json it's 2, so
/// we need to disambiguate.
extra_id: Option<DefId>,
/// Needed for `#[rustc_doc_primitive]` modules.
///
/// For these, 1 [`DefId`] is used for both the primitive and the fake-module
/// that holds its docs.
///
/// N.B. This only matters when documenting the standard library with
/// `--document-private-items`. Maybe we should delete that module, and
/// remove this.
name: Option<Symbol>,
}
impl JsonRenderer<'_> {
pub(crate) fn id_from_item_default(&self, item_id: clean::ItemId) -> types::Id {
self.id_from_item_inner(item_id, None, None)
}
fn id_from_item_inner(
&self,
item_id: clean::ItemId,
name: Option<Symbol>,
imported_id: Option<DefId>,
) -> types::Id {
let (def_id, extra_id) = match item_id {
clean::ItemId::DefId(did) => (did, imported_id),
clean::ItemId::Blanket { for_, impl_id } => (for_, Some(impl_id)),
clean::ItemId::Auto { for_, trait_ } => (for_, Some(trait_)),
};
let name = match name {
Some(name) => Some(name),
None => {
// We need this workaround because primitive types' DefId actually refers to
// their parent module, which isn't present in the output JSON items. So
// instead, we directly get the primitive symbol
if matches!(self.tcx.def_kind(def_id), DefKind::Mod)
&& let Some(prim) = self
.tcx
.get_attrs(def_id, sym::rustc_doc_primitive)
.find_map(|attr| attr.value_str())
{
Some(prim)
} else {
self.tcx.opt_item_name(def_id)
}
}
};
let key = FullItemId { def_id, extra_id, name };
let mut interner = self.id_interner.borrow_mut();
let len = interner.len();
*interner
.entry(key)
.or_insert_with(|| types::Id(len.try_into().expect("too many items in a crate")))
}
pub(crate) fn id_from_item(&self, item: &clean::Item) -> types::Id {
match item.kind {
clean::ItemKind::ImportItem(ref import) => {
let imported_id = import.source.did;
self.id_from_item_inner(item.item_id, item.name, imported_id)
}
_ => self.id_from_item_inner(item.item_id, item.name, None),
}
}
}

View file

@ -5,6 +5,7 @@
//! docs for usage and details.
mod conversions;
mod ids;
mod import_finder;
use std::cell::RefCell;
@ -16,7 +17,6 @@ use std::rc::Rc;
use rustc_hir::def_id::{DefId, DefIdSet};
use rustc_middle::ty::TyCtxt;
use rustc_session::Session;
use rustc_span::Symbol;
use rustc_span::def_id::LOCAL_CRATE;
use rustdoc_json_types as types;
// It's important to use the FxHashMap from rustdoc_json_types here, instead of
@ -35,14 +35,6 @@ use crate::formats::cache::Cache;
use crate::json::conversions::IntoJson;
use crate::{clean, try_err};
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
struct FullItemId {
def_id: DefId,
name: Option<Symbol>,
/// Used to distinguish imports of different items with the same name
extra: Option<types::Id>,
}
#[derive(Clone)]
pub(crate) struct JsonRenderer<'tcx> {
tcx: TyCtxt<'tcx>,
@ -55,7 +47,7 @@ pub(crate) struct JsonRenderer<'tcx> {
out_dir: Option<PathBuf>,
cache: Rc<Cache>,
imported_items: DefIdSet,
id_interner: Rc<RefCell<FxHashMap<(FullItemId, Option<FullItemId>), types::Id>>>,
id_interner: Rc<RefCell<ids::IdInterner>>,
}
impl<'tcx> JsonRenderer<'tcx> {