Auto merge of #125166 - lovesegfault:embed-rustc-perf, r=Mark-Simulacrum

refactor: add rustc-perf submodule to src/tools

Currently, it's very challenging to perform a sandboxed `opt-dist`
bootstrap because the tool requires `rustc-perf` to be present, but
there is no proper management/tracking of it. Instead, a specific commit
is hardcoded where it is needed, and a non-checksummed zip is fetched
ad-hoc. This happens in two places:

`src/ci/docker/host-x86_64/dist-x86_64-linux/Dockerfile`:

```dockerfile
ENV PERF_COMMIT 4f313add609f43e928e98132358e8426ed3969ae
RUN curl -LS -o perf.zip https://ci-mirrors.rust-lang.org/rustc/rustc-perf-$PERF_COMMIT.zip && \
    unzip perf.zip && \
    mv rustc-perf-$PERF_COMMIT rustc-perf && \
    rm perf.zip
```

`src/tools/opt-dist/src/main.rs`

```rust
// FIXME: add some mechanism for synchronization of this commit SHA with
// Linux (which builds rustc-perf in a Dockerfile)
// rustc-perf version from 2023-10-22
const PERF_COMMIT: &str = "4f313add609f43e928e98132358e8426ed3969ae";

let url = format!("https://ci-mirrors.rust-lang.org/rustc/rustc-perf-{PERF_COMMIT}.zip");
let client = reqwest::blocking::Client::builder()
    .timeout(Duration::from_secs(60 * 2))
    .connect_timeout(Duration::from_secs(60 * 2))
    .build()?;
let response = retry_action(
    || Ok(client.get(&url).send()?.error_for_status()?.bytes()?.to_vec()),
    "Download rustc-perf archive",
    5,
)?;
```

This causes a few issues:

1. Maintainers need to be careful to bump PERF_COMMIT in both places
   every time
2. In order to run `opt-dist` in a sandbox, you need to provide your own
   `rustc-perf` (https://github.com/rust-lang/rust/pull/125125), but to
   figure out which commit to provide you need to grep the Dockerfile
3. Even if you manage to provide the correct `rustc-perf`, its
   dependencies are not included in the `vendor/` dir created during
   `dist`, so it will fail to build from the published source tarballs
4. It is hard to provide any level of automation around updating the
   `rustc-perf` in use, leading to staleness

Fundamentally, this means `rustc-src` tarballs no longer contain
everything you need to bootstrap Rust, and packagers hoping to leverage
`opt-dist` need to go out of their way to keep track of this "hidden"
dependency on `rustc-perf`.

This change adds rustc-perf as a git submodule, pinned to the current
`PERF_COMMIT` 4f313add609f43e928e98132358e8426ed3969ae. Subsequent
commits ensure the submodule is initialized when necessary, and make use
of it in `opt-dist`.
This commit is contained in:
bors 2024-05-20 22:36:55 +00:00
commit 60faa271d9
13 changed files with 70 additions and 445 deletions

View file

@ -13,8 +13,6 @@ humansize = "2"
sysinfo = { version = "0.30", default-features = false }
fs_extra = "1"
camino = "1"
reqwest = { version = "0.11", features = ["blocking"] }
zip = { version = "0.6", default-features = false, features = ["deflate"] }
tar = "0.4"
xz = { version = "0.1", package = "xz2" }
serde = { version = "1", features = ["derive"] }

View file

@ -3,10 +3,7 @@ use anyhow::Context;
use camino::{Utf8Path, Utf8PathBuf};
use clap::Parser;
use log::LevelFilter;
use std::io::Cursor;
use std::time::Duration;
use utils::io;
use zip::ZipArchive;
use crate::environment::{Environment, EnvironmentBuilder};
use crate::exec::{cmd, Bootstrap};
@ -17,9 +14,9 @@ use crate::training::{
rustc_benchmarks,
};
use crate::utils::artifact_size::print_binary_sizes;
use crate::utils::io::{copy_directory, move_directory, reset_directory};
use crate::utils::io::{copy_directory, reset_directory};
use crate::utils::{
clear_llvm_files, format_env_variables, print_free_disk_space, retry_action, with_log_group,
clear_llvm_files, format_env_variables, print_free_disk_space, with_log_group,
write_timer_to_summary,
};
@ -69,7 +66,12 @@ enum EnvironmentCmd {
#[arg(long, default_value = "opt-artifacts")]
artifact_dir: Utf8PathBuf,
/// Checkout directory of `rustc-perf`, it will be fetched automatically if unspecified.
/// Checkout directory of `rustc-perf`.
///
/// If unspecified, defaults to the rustc-perf submodule in the rustc checkout dir
/// (`src/tools/rustc-perf`), which should have been initialized when building this tool.
// FIXME: Move update_submodule into build_helper, that way we can also ensure the submodule
// is updated when _running_ opt-dist, rather than building.
#[arg(long)]
rustc_perf_checkout_dir: Option<Utf8PathBuf>,
@ -146,8 +148,6 @@ fn create_environment(args: Args) -> anyhow::Result<(Environment, Vec<String>)>
.host_llvm_dir(Utf8PathBuf::from("/rustroot"))
.artifact_dir(Utf8PathBuf::from("/tmp/tmp-multistage/opt-artifacts"))
.build_dir(checkout_dir.join("obj"))
// /tmp/rustc-perf comes from the x64 dist Dockerfile
.prebuilt_rustc_perf(Some(Utf8PathBuf::from("/tmp/rustc-perf")))
.shared_llvm(true)
.use_bolt(true)
.skipped_tests(vec![
@ -191,9 +191,12 @@ fn execute_pipeline(
) -> anyhow::Result<()> {
reset_directory(&env.artifact_dir())?;
with_log_group("Building rustc-perf", || match env.prebuilt_rustc_perf() {
Some(dir) => copy_rustc_perf(env, &dir),
None => download_rustc_perf(env),
with_log_group("Building rustc-perf", || {
let rustc_perf_checkout_dir = match env.prebuilt_rustc_perf() {
Some(dir) => dir,
None => env.checkout_path().join("src").join("tools").join("rustc-perf"),
};
copy_rustc_perf(env, &rustc_perf_checkout_dir)
})?;
// Stage 1: Build PGO instrumented rustc
@ -409,36 +412,6 @@ fn copy_rustc_perf(env: &Environment, dir: &Utf8Path) -> anyhow::Result<()> {
build_rustc_perf(env)
}
// Download and build rustc-perf into the given environment.
fn download_rustc_perf(env: &Environment) -> anyhow::Result<()> {
reset_directory(&env.rustc_perf_dir())?;
// FIXME: add some mechanism for synchronization of this commit SHA with
// Linux (which builds rustc-perf in a Dockerfile)
// rustc-perf version from 2023-10-22
const PERF_COMMIT: &str = "4f313add609f43e928e98132358e8426ed3969ae";
let url = format!("https://ci-mirrors.rust-lang.org/rustc/rustc-perf-{PERF_COMMIT}.zip");
let client = reqwest::blocking::Client::builder()
.timeout(Duration::from_secs(60 * 2))
.connect_timeout(Duration::from_secs(60 * 2))
.build()?;
let response = retry_action(
|| Ok(client.get(&url).send()?.error_for_status()?.bytes()?.to_vec()),
"Download rustc-perf archive",
5,
)?;
let mut archive = ZipArchive::new(Cursor::new(response))?;
archive.extract(env.rustc_perf_dir())?;
move_directory(
&env.rustc_perf_dir().join(format!("rustc-perf-{PERF_COMMIT}")),
&env.rustc_perf_dir(),
)?;
build_rustc_perf(env)
}
fn build_rustc_perf(env: &Environment) -> anyhow::Result<()> {
cmd(&[env.cargo_stage_0().as_str(), "build", "-p", "collector"])
.workdir(&env.rustc_perf_dir())

1
src/tools/rustc-perf Submodule

@ -0,0 +1 @@
Subproject commit 4f313add609f43e928e98132358e8426ed3969ae

View file

@ -11,5 +11,6 @@ extend-exclude = """(\
src/doc/edition-guide/|\
src/llvm-project/|\
src/doc/embedded-book/|\
src/tools/rustc-perf/|\
library/backtrace/
)"""

View file

@ -26,6 +26,7 @@ extend-exclude = [
"src/llvm-project/",
"src/doc/embedded-book/",
"library/backtrace/",
"src/tools/rustc-perf/",
# Hack: CI runs from a subdirectory under the main checkout
"../src/doc/nomicon/",
"../src/tools/cargo/",
@ -38,4 +39,5 @@ extend-exclude = [
"../src/llvm-project/",
"../src/doc/embedded-book/",
"../library/backtrace/",
"../src/tools/rustc-perf/",
]

View file

@ -81,12 +81,10 @@ const EXCEPTIONS: ExceptionList = &[
("ar_archive_writer", "Apache-2.0 WITH LLVM-exception"), // rustc
("colored", "MPL-2.0"), // rustfmt
("dissimilar", "Apache-2.0"), // rustdoc, rustc_lexer (few tests) via expect-test, (dev deps)
("encoding_rs", "(Apache-2.0 OR MIT) AND BSD-3-Clause"), // opt-dist
("fluent-langneg", "Apache-2.0"), // rustc (fluent translations)
("fortanix-sgx-abi", "MPL-2.0"), // libstd but only for `sgx` target. FIXME: this dependency violates the documentation comment above.
("instant", "BSD-3-Clause"), // rustc_driver/tracing-subscriber/parking_lot
("mdbook", "MPL-2.0"), // mdbook
("openssl", "Apache-2.0"), // opt-dist
("option-ext", "MPL-2.0"), // cargo-miri (via `directories`)
("rustc_apfloat", "Apache-2.0 WITH LLVM-exception"), // rustc (license is the same as LLVM uses)
("ryu", "Apache-2.0 OR BSL-1.0"), // BSL is not acceptble, but we use it under Apache-2.0 // cargo/... (because of serde)

View file

@ -18,6 +18,7 @@ pub fn filter_dirs(path: &Path) -> bool {
"src/tools/clippy",
"src/tools/miri",
"src/tools/rust-analyzer",
"src/tools/rustc-perf",
"src/tools/rustfmt",
"src/doc/book",
"src/doc/edition-guide",