Rollup merge of #149043 - aDotInTheVoid:is-this-real-is-this-out-of-spite-does-it-matter, r=GuillaumeGomez

rustdoc-json: add rlib path to ExternalCrate to enable robust crate resolution

Historically, it's not been possible to robustly resolve a cross-crate item in rustdoc-json. If you had a `Id` that wasn't in `Crate::index` (because it was defined in a different crate), you could only look it up it `Crate::paths`. But there, you don't get the full information, only an `ItemSummary`. This tells you the `path` and the `crate_id`.

But knowing the `crate_id` isn't enough to be able to build/find the rustdoc-json output with this item. It's only use is to get a `ExternalCrate` (via `Crate::external_crates`). But that only tells you the `name` (as a string). This isn't enough to uniquely identify a crate, as there could be multiple versions/features [^1] [^2].

This was originally proposed to be solved via `@LukeMathWalker's` `--orchestrator-id` proposal (https://github.com/rust-lang/compiler-team/issues/635). But that requires invasive changes to cargo/rustc. This PR instead implements `@Urgau's` proposal to re-use the path to a crate's rmeta/rlib as a unique identifer. Callers can use that to determine which package it corresponds to in the language of the build-system above rustc. E.g. for cargo, `cargo rustdoc --message-format=json --output-format=json -Zunstable-options`).

(Once you've found the right external crate's rustdoc-json output, you still need to resolve the path->id in that crate. But that's """just""" a matter of walking the module tree. We should probably still make that nicer (by, for example, allowing sharing `Id`s between rustdoc-json document), but that's a future concern)

For some notes from RustWeek 2025, where this was designed, see https://hackmd.io/0jkdguobTnW7nXoGKAxfEQ

CC `@obi1kenobi` (who wants this for cargo-semver-checks [^3]), `@epage` (who's conversations on what and wasn't possible with cargo informed taking this approach to solve this problem)

r? `@GuillaumeGomez`

## TODO:

- [x] Docs: [Done](e4cdd0c24a..457ed4edb1)
- [x] Tests: [Done](2e1b954dc5..4d00c1a7ee)

[^1]: https://github.com/rust-lang/compiler-team/issues/635#issue-1714254865 § Problem
[^2]: https://rust-lang.zulipchat.com/#narrow/channel/266220-t-rustdoc/topic/Identifying.20external.20crates.20in.20Rustdoc.20JSON/with/352701211
[^3]: https://github.com/obi1kenobi/cargo-semver-checks/issues/638
This commit is contained in:
Guillaume Gomez 2025-11-21 21:34:26 +01:00 committed by GitHub
commit 629a283b98
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 124 additions and 6 deletions

View file

@ -3336,6 +3336,7 @@ dependencies = [
"libc",
"object 0.37.3",
"regex",
"rustdoc-json-types",
"serde_json",
"similar",
"wasmparser 0.236.1",

View file

@ -302,6 +302,13 @@ impl<'tcx> FormatRenderer<'tcx> for JsonRenderer<'tcx> {
ExternalLocation::Remote(s) => Some(s.clone()),
_ => None,
},
path: self
.tcx
.used_crate_source(*crate_num)
.paths()
.next()
.expect("crate should have at least 1 path")
.clone(),
},
)
})
@ -339,15 +346,12 @@ mod size_asserts {
// tidy-alphabetical-start
static_assert_size!(AssocItemConstraint, 112);
static_assert_size!(Crate, 184);
static_assert_size!(ExternalCrate, 48);
static_assert_size!(FunctionPointer, 168);
static_assert_size!(GenericArg, 80);
static_assert_size!(GenericArgs, 104);
static_assert_size!(GenericBound, 72);
static_assert_size!(GenericParamDef, 136);
static_assert_size!(Impl, 304);
// `Item` contains a `PathBuf`, which is different sizes on different OSes.
static_assert_size!(Item, 528 + size_of::<std::path::PathBuf>());
static_assert_size!(ItemSummary, 32);
static_assert_size!(PolyTrait, 64);
static_assert_size!(PreciseCapturingArg, 32);
@ -355,4 +359,8 @@ mod size_asserts {
static_assert_size!(Type, 80);
static_assert_size!(WherePredicate, 160);
// tidy-alphabetical-end
// These contains a `PathBuf`, which is different sizes on different OSes.
static_assert_size!(Item, 528 + size_of::<std::path::PathBuf>());
static_assert_size!(ExternalCrate, 48 + size_of::<std::path::PathBuf>());
}

View file

@ -37,8 +37,8 @@ pub type FxHashMap<K, V> = HashMap<K, V>; // re-export for use in src/librustdoc
// will instead cause conflicts. See #94591 for more. (This paragraph and the "Latest feature" line
// are deliberately not in a doc comment, because they need not be in public docs.)
//
// Latest feature: Add `ItemKind::Attribute`.
pub const FORMAT_VERSION: u32 = 56;
// Latest feature: Add `ExternCrate::path`.
pub const FORMAT_VERSION: u32 = 57;
/// The root of the emitted JSON blob.
///
@ -135,6 +135,12 @@ pub struct ExternalCrate {
pub name: String,
/// The root URL at which the crate's documentation lives.
pub html_root_url: Option<String>,
/// A path from where this crate was loaded.
///
/// This will typically be a `.rlib` or `.rmeta`. It can be used to determine which crate
/// this was in terms of whatever build-system invoked rustc.
pub path: PathBuf,
}
/// Information about an external (not defined in the local crate) [`Item`].

View file

@ -22,6 +22,8 @@ wasmparser = { version = "0.236", default-features = false, features = ["std", "
# Shared with bootstrap and compiletest
build_helper = { path = "../../build_helper" }
# Shared with rustdoc
rustdoc-json-types = { path = "../../rustdoc-json-types" }
[lib]
crate-type = ["lib", "dylib"]

View file

@ -34,7 +34,7 @@ pub mod rfs {
}
// Re-exports of third-party library crates.
pub use {bstr, gimli, libc, object, regex, serde_json, similar, wasmparser};
pub use {bstr, gimli, libc, object, regex, rustdoc_json_types, serde_json, similar, wasmparser};
// Helpers for building names of output artifacts that are potentially target-specific.
pub use crate::artifact_names::{

View file

@ -0,0 +1,5 @@
#![no_std]
pub struct S;
pub use trans_dep::S as TransDep;

View file

@ -0,0 +1,4 @@
#![no_std]
pub type FromDep = dep::S;
pub type FromTransDep = dep::TransDep;

View file

@ -0,0 +1,89 @@
use std::path;
use run_make_support::rustdoc_json_types::{Crate, ItemEnum, Path, Type, TypeAlias};
use run_make_support::{cwd, rfs, rust_lib_name, rustc, rustdoc, serde_json};
#[track_caller]
fn canonicalize(p: &path::Path) -> path::PathBuf {
std::fs::canonicalize(p).expect("path should be canonicalizeable")
}
fn main() {
rustc().input("trans_dep.rs").edition("2024").crate_type("lib").run();
rustc()
.input("dep.rs")
.edition("2024")
.crate_type("lib")
.extern_("trans_dep", rust_lib_name("trans_dep"))
.run();
rustdoc()
.input("entry.rs")
.edition("2024")
.output_format("json")
.library_search_path(cwd())
.extern_("dep", rust_lib_name("dep"))
.arg("-Zunstable-options")
.run();
let bytes = rfs::read("doc/entry.json");
let krate: Crate = serde_json::from_slice(&bytes).expect("output should be valid json");
let root_item = &krate.index[&krate.root];
let ItemEnum::Module(root_mod) = &root_item.inner else { panic!("expected ItemEnum::Module") };
assert_eq!(root_mod.items.len(), 2);
let items = root_mod.items.iter().map(|id| &krate.index[id]).collect::<Vec<_>>();
let from_dep = items
.iter()
.filter(|item| item.name.as_deref() == Some("FromDep"))
.next()
.expect("there should be en item called FromDep");
let from_trans_dep = items
.iter()
.filter(|item| item.name.as_deref() == Some("FromTransDep"))
.next()
.expect("there should be en item called FromDep");
let ItemEnum::TypeAlias(TypeAlias {
type_: Type::ResolvedPath(Path { id: from_dep_id, .. }),
..
}) = &from_dep.inner
else {
panic!("Expected FromDep to be a TypeAlias");
};
let ItemEnum::TypeAlias(TypeAlias {
type_: Type::ResolvedPath(Path { id: from_trans_dep_id, .. }),
..
}) = &from_trans_dep.inner
else {
panic!("Expected FromDep to be a TypeAlias");
};
assert_eq!(krate.index.get(from_dep_id), None);
assert_eq!(krate.index.get(from_trans_dep_id), None);
let from_dep_externalinfo = &krate.paths[from_dep_id];
let from_trans_dep_externalinfo = &krate.paths[from_trans_dep_id];
let dep_crate_id = from_dep_externalinfo.crate_id;
let trans_dep_crate_id = from_trans_dep_externalinfo.crate_id;
let dep = &krate.external_crates[&dep_crate_id];
let trans_dep = &krate.external_crates[&trans_dep_crate_id];
assert_eq!(dep.name, "dep");
assert_eq!(trans_dep.name, "trans_dep");
assert_eq!(canonicalize(&dep.path), canonicalize(&cwd().join(rust_lib_name("dep"))));
assert_eq!(
canonicalize(&trans_dep.path),
canonicalize(&cwd().join(rust_lib_name("trans_dep")))
);
}

View file

@ -0,0 +1,3 @@
#![no_std]
pub struct S;