Do not build docs in try builds This PR adds a new environment variable to the optimized build Python script, which causes it to ignore certain parts of the final `dist` build (mainly docs) in try builds. This reduces the duration of try builds by ~10 minutes.
883 lines
28 KiB
Python
883 lines
28 KiB
Python
#!/usr/bin/env python3
|
|
# ignore-tidy-linelength
|
|
|
|
# Compatible with Python 3.6+
|
|
|
|
import contextlib
|
|
import getpass
|
|
import glob
|
|
import json
|
|
import logging
|
|
import os
|
|
import pprint
|
|
import shutil
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import traceback
|
|
import urllib.request
|
|
from io import StringIO
|
|
from pathlib import Path
|
|
from typing import Callable, ContextManager, Dict, Iterable, Iterator, List, Optional, \
|
|
Tuple, Union
|
|
|
|
PGO_HOST = os.environ["PGO_HOST"]
|
|
|
|
LOGGER = logging.getLogger("stage-build")
|
|
|
|
LLVM_PGO_CRATES = [
|
|
"syn-1.0.89",
|
|
"cargo-0.60.0",
|
|
"serde-1.0.136",
|
|
"ripgrep-13.0.0",
|
|
"regex-1.5.5",
|
|
"clap-3.1.6",
|
|
"hyper-0.14.18"
|
|
]
|
|
|
|
RUSTC_PGO_CRATES = [
|
|
"externs",
|
|
"ctfe-stress-5",
|
|
"cargo-0.60.0",
|
|
"token-stream-stress",
|
|
"match-stress",
|
|
"tuple-stress",
|
|
"diesel-1.4.8",
|
|
"bitmaps-3.1.0"
|
|
]
|
|
|
|
LLVM_BOLT_CRATES = LLVM_PGO_CRATES
|
|
|
|
|
|
def is_try_build() -> bool:
|
|
return os.environ.get("DIST_TRY_BUILD", "0") != "0"
|
|
|
|
|
|
class Pipeline:
|
|
# Paths
|
|
def checkout_path(self) -> Path:
|
|
"""
|
|
The root checkout, where the source is located.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def downloaded_llvm_dir(self) -> Path:
|
|
"""
|
|
Directory where the host LLVM is located.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def build_root(self) -> Path:
|
|
"""
|
|
The main directory where the build occurs.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def build_artifacts(self) -> Path:
|
|
return self.build_root() / "build" / PGO_HOST
|
|
|
|
def rustc_stage_0(self) -> Path:
|
|
return self.build_artifacts() / "stage0" / "bin" / "rustc"
|
|
|
|
def cargo_stage_0(self) -> Path:
|
|
return self.build_artifacts() / "stage0" / "bin" / "cargo"
|
|
|
|
def rustc_stage_2(self) -> Path:
|
|
return self.build_artifacts() / "stage2" / "bin" / "rustc"
|
|
|
|
def opt_artifacts(self) -> Path:
|
|
raise NotImplementedError
|
|
|
|
def llvm_profile_dir_root(self) -> Path:
|
|
return self.opt_artifacts() / "llvm-pgo"
|
|
|
|
def llvm_profile_merged_file(self) -> Path:
|
|
return self.opt_artifacts() / "llvm-pgo.profdata"
|
|
|
|
def rustc_perf_dir(self) -> Path:
|
|
return self.opt_artifacts() / "rustc-perf"
|
|
|
|
def build_rustc_perf(self):
|
|
raise NotImplementedError()
|
|
|
|
def rustc_profile_dir_root(self) -> Path:
|
|
return self.opt_artifacts() / "rustc-pgo"
|
|
|
|
def rustc_profile_merged_file(self) -> Path:
|
|
return self.opt_artifacts() / "rustc-pgo.profdata"
|
|
|
|
def rustc_profile_template_path(self) -> Path:
|
|
"""
|
|
The profile data is written into a single filepath that is being repeatedly merged when each
|
|
rustc invocation ends. Empirically, this can result in some profiling data being lost. That's
|
|
why we override the profile path to include the PID. This will produce many more profiling
|
|
files, but the resulting profile will produce a slightly faster rustc binary.
|
|
"""
|
|
return self.rustc_profile_dir_root() / "default_%m_%p.profraw"
|
|
|
|
def supports_bolt(self) -> bool:
|
|
raise NotImplementedError
|
|
|
|
def llvm_bolt_profile_merged_file(self) -> Path:
|
|
return self.opt_artifacts() / "bolt.profdata"
|
|
|
|
def metrics_path(self) -> Path:
|
|
return self.build_root() / "build" / "metrics.json"
|
|
|
|
|
|
class LinuxPipeline(Pipeline):
|
|
def checkout_path(self) -> Path:
|
|
return Path("/checkout")
|
|
|
|
def downloaded_llvm_dir(self) -> Path:
|
|
return Path("/rustroot")
|
|
|
|
def build_root(self) -> Path:
|
|
return self.checkout_path() / "obj"
|
|
|
|
def opt_artifacts(self) -> Path:
|
|
return Path("/tmp/tmp-multistage/opt-artifacts")
|
|
|
|
def build_rustc_perf(self):
|
|
# /tmp/rustc-perf comes from the Dockerfile
|
|
shutil.copytree("/tmp/rustc-perf", self.rustc_perf_dir())
|
|
cmd(["chown", "-R", f"{getpass.getuser()}:", self.rustc_perf_dir()])
|
|
|
|
with change_cwd(self.rustc_perf_dir()):
|
|
cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
|
|
RUSTC=str(self.rustc_stage_0()),
|
|
RUSTC_BOOTSTRAP="1"
|
|
))
|
|
|
|
def supports_bolt(self) -> bool:
|
|
return True
|
|
|
|
|
|
class WindowsPipeline(Pipeline):
|
|
def __init__(self):
|
|
self.checkout_dir = Path(os.getcwd())
|
|
|
|
def checkout_path(self) -> Path:
|
|
return self.checkout_dir
|
|
|
|
def downloaded_llvm_dir(self) -> Path:
|
|
return self.checkout_path() / "citools" / "clang-rust"
|
|
|
|
def build_root(self) -> Path:
|
|
return self.checkout_path()
|
|
|
|
def opt_artifacts(self) -> Path:
|
|
return self.checkout_path() / "opt-artifacts"
|
|
|
|
def rustc_stage_0(self) -> Path:
|
|
return super().rustc_stage_0().with_suffix(".exe")
|
|
|
|
def cargo_stage_0(self) -> Path:
|
|
return super().cargo_stage_0().with_suffix(".exe")
|
|
|
|
def rustc_stage_2(self) -> Path:
|
|
return super().rustc_stage_2().with_suffix(".exe")
|
|
|
|
def build_rustc_perf(self):
|
|
# rustc-perf version from 2023-03-15
|
|
perf_commit = "8b2ac3042e1ff2c0074455a0a3618adef97156b1"
|
|
rustc_perf_zip_path = self.opt_artifacts() / "perf.zip"
|
|
|
|
def download_rustc_perf():
|
|
download_file(
|
|
f"https://github.com/rust-lang/rustc-perf/archive/{perf_commit}.zip",
|
|
rustc_perf_zip_path
|
|
)
|
|
with change_cwd(self.opt_artifacts()):
|
|
unpack_archive(rustc_perf_zip_path)
|
|
move_path(Path(f"rustc-perf-{perf_commit}"), self.rustc_perf_dir())
|
|
delete_file(rustc_perf_zip_path)
|
|
|
|
retry_action(download_rustc_perf, "Download rustc-perf")
|
|
|
|
with change_cwd(self.rustc_perf_dir()):
|
|
cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict(
|
|
RUSTC=str(self.rustc_stage_0()),
|
|
RUSTC_BOOTSTRAP="1"
|
|
))
|
|
|
|
def rustc_profile_template_path(self) -> Path:
|
|
"""
|
|
On Windows, we don't have enough space to use separate files for each rustc invocation.
|
|
Therefore, we use a single file for the generated profiles.
|
|
"""
|
|
return self.rustc_profile_dir_root() / "default_%m.profraw"
|
|
|
|
def supports_bolt(self) -> bool:
|
|
return False
|
|
|
|
|
|
def get_timestamp() -> float:
|
|
return time.time()
|
|
|
|
|
|
Duration = float
|
|
|
|
|
|
def iterate_timers(timer: "Timer", name: str, level: int = 0) -> Iterator[
|
|
Tuple[int, str, Duration]]:
|
|
"""
|
|
Hierarchically iterate the children of a timer, in a depth-first order.
|
|
"""
|
|
yield (level, name, timer.total_duration())
|
|
for (child_name, child_timer) in timer.children:
|
|
yield from iterate_timers(child_timer, child_name, level=level + 1)
|
|
|
|
|
|
class Timer:
|
|
def __init__(self, parent_names: Tuple[str, ...] = ()):
|
|
self.children: List[Tuple[str, Timer]] = []
|
|
self.section_active = False
|
|
self.parent_names = parent_names
|
|
self.duration_excluding_children: Duration = 0
|
|
|
|
@contextlib.contextmanager
|
|
def section(self, name: str) -> ContextManager["Timer"]:
|
|
assert not self.section_active
|
|
self.section_active = True
|
|
|
|
start = get_timestamp()
|
|
exc = None
|
|
|
|
child_timer = Timer(parent_names=self.parent_names + (name,))
|
|
full_name = " > ".join(child_timer.parent_names)
|
|
try:
|
|
LOGGER.info(f"Section `{full_name}` starts")
|
|
yield child_timer
|
|
except BaseException as exception:
|
|
exc = exception
|
|
raise
|
|
finally:
|
|
end = get_timestamp()
|
|
duration = end - start
|
|
|
|
child_timer.duration_excluding_children = duration - child_timer.total_duration()
|
|
self.add_child(name, child_timer)
|
|
if exc is None:
|
|
LOGGER.info(f"Section `{full_name}` ended: OK ({duration:.2f}s)")
|
|
else:
|
|
LOGGER.info(f"Section `{full_name}` ended: FAIL ({duration:.2f}s)")
|
|
self.section_active = False
|
|
|
|
def total_duration(self) -> Duration:
|
|
return self.duration_excluding_children + sum(
|
|
c.total_duration() for (_, c) in self.children)
|
|
|
|
def has_children(self) -> bool:
|
|
return len(self.children) > 0
|
|
|
|
def print_stats(self):
|
|
rows = []
|
|
for (child_name, child_timer) in self.children:
|
|
for (level, name, duration) in iterate_timers(child_timer, child_name, level=0):
|
|
label = f"{' ' * level}{name}:"
|
|
rows.append((label, duration))
|
|
|
|
# Empty row
|
|
rows.append(("", ""))
|
|
|
|
total_duration_label = "Total duration:"
|
|
total_duration = self.total_duration()
|
|
rows.append((total_duration_label, humantime(total_duration)))
|
|
|
|
space_after_label = 2
|
|
max_label_length = max(16, max(len(label) for (label, _) in rows)) + space_after_label
|
|
|
|
table_width = max_label_length + 23
|
|
divider = "-" * table_width
|
|
|
|
with StringIO() as output:
|
|
print(divider, file=output)
|
|
for (label, duration) in rows:
|
|
if isinstance(duration, Duration):
|
|
pct = (duration / total_duration) * 100
|
|
value = f"{duration:>12.2f}s ({pct:>5.2f}%)"
|
|
else:
|
|
value = f"{duration:>{len(total_duration_label) + 7}}"
|
|
print(f"{label:<{max_label_length}} {value}", file=output)
|
|
print(divider, file=output, end="")
|
|
LOGGER.info(f"Timer results\n{output.getvalue()}")
|
|
|
|
def add_child(self, name: str, timer: "Timer"):
|
|
self.children.append((name, timer))
|
|
|
|
def add_duration(self, name: str, duration: Duration):
|
|
timer = Timer(parent_names=self.parent_names + (name,))
|
|
timer.duration_excluding_children = duration
|
|
self.add_child(name, timer)
|
|
|
|
|
|
class BuildStep:
|
|
def __init__(self, type: str, children: List["BuildStep"], duration: float):
|
|
self.type = type
|
|
self.children = children
|
|
self.duration = duration
|
|
|
|
def find_all_by_type(self, type: str) -> Iterator["BuildStep"]:
|
|
if type == self.type:
|
|
yield self
|
|
for child in self.children:
|
|
yield from child.find_all_by_type(type)
|
|
|
|
def __repr__(self):
|
|
return f"BuildStep(type={self.type}, duration={self.duration}, children={len(self.children)})"
|
|
|
|
|
|
def load_last_metrics(path: Path) -> BuildStep:
|
|
"""
|
|
Loads the metrics of the most recent bootstrap execution from a metrics.json file.
|
|
"""
|
|
with open(path, "r") as f:
|
|
metrics = json.load(f)
|
|
invocation = metrics["invocations"][-1]
|
|
|
|
def parse(entry) -> Optional[BuildStep]:
|
|
if "kind" not in entry or entry["kind"] != "rustbuild_step":
|
|
return None
|
|
type = entry.get("type", "")
|
|
duration = entry.get("duration_excluding_children_sec", 0)
|
|
children = []
|
|
|
|
for child in entry.get("children", ()):
|
|
step = parse(child)
|
|
if step is not None:
|
|
children.append(step)
|
|
duration += step.duration
|
|
return BuildStep(type=type, children=children, duration=duration)
|
|
|
|
children = [parse(child) for child in invocation.get("children", ())]
|
|
return BuildStep(
|
|
type="root",
|
|
children=children,
|
|
duration=invocation.get("duration_including_children_sec", 0)
|
|
)
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def change_cwd(dir: Path):
|
|
"""
|
|
Temporarily change working directory to `dir`.
|
|
"""
|
|
cwd = os.getcwd()
|
|
LOGGER.debug(f"Changing working dir from `{cwd}` to `{dir}`")
|
|
os.chdir(dir)
|
|
try:
|
|
yield
|
|
finally:
|
|
LOGGER.debug(f"Reverting working dir to `{cwd}`")
|
|
os.chdir(cwd)
|
|
|
|
|
|
def humantime(time_s: float) -> str:
|
|
hours = time_s // 3600
|
|
time_s = time_s % 3600
|
|
minutes = time_s // 60
|
|
seconds = time_s % 60
|
|
|
|
result = ""
|
|
if hours > 0:
|
|
result += f"{int(hours)}h "
|
|
if minutes > 0:
|
|
result += f"{int(minutes)}m "
|
|
result += f"{round(seconds)}s"
|
|
return result
|
|
|
|
|
|
def move_path(src: Path, dst: Path):
|
|
LOGGER.info(f"Moving `{src}` to `{dst}`")
|
|
shutil.move(src, dst)
|
|
|
|
|
|
def delete_file(path: Path):
|
|
LOGGER.info(f"Deleting file `{path}`")
|
|
os.unlink(path)
|
|
|
|
|
|
def delete_directory(path: Path):
|
|
LOGGER.info(f"Deleting directory `{path}`")
|
|
shutil.rmtree(path)
|
|
|
|
|
|
def unpack_archive(archive: Path):
|
|
LOGGER.info(f"Unpacking archive `{archive}`")
|
|
shutil.unpack_archive(archive)
|
|
|
|
|
|
def download_file(src: str, target: Path):
|
|
LOGGER.info(f"Downloading `{src}` into `{target}`")
|
|
urllib.request.urlretrieve(src, str(target))
|
|
|
|
|
|
def retry_action(action, name: str, max_fails: int = 5):
|
|
LOGGER.info(f"Attempting to perform action `{name}` with retry")
|
|
for iteration in range(max_fails):
|
|
LOGGER.info(f"Attempt {iteration + 1}/{max_fails}")
|
|
try:
|
|
action()
|
|
return
|
|
except:
|
|
LOGGER.error(f"Action `{name}` has failed\n{traceback.format_exc()}")
|
|
|
|
raise Exception(f"Action `{name}` has failed after {max_fails} attempts")
|
|
|
|
|
|
def cmd(
|
|
args: List[Union[str, Path]],
|
|
env: Optional[Dict[str, str]] = None,
|
|
output_path: Optional[Path] = None
|
|
):
|
|
args = [str(arg) for arg in args]
|
|
|
|
environment = os.environ.copy()
|
|
|
|
cmd_str = ""
|
|
if env is not None:
|
|
environment.update(env)
|
|
cmd_str += " ".join(f"{k}={v}" for (k, v) in (env or {}).items())
|
|
cmd_str += " "
|
|
cmd_str += " ".join(args)
|
|
if output_path is not None:
|
|
cmd_str += f" > {output_path}"
|
|
LOGGER.info(f"Executing `{cmd_str}`")
|
|
|
|
if output_path is not None:
|
|
with open(output_path, "w") as f:
|
|
return subprocess.run(
|
|
args,
|
|
env=environment,
|
|
check=True,
|
|
stdout=f
|
|
)
|
|
return subprocess.run(args, env=environment, check=True)
|
|
|
|
class BenchmarkRunner:
|
|
def run_rustc(self, pipeline: Pipeline):
|
|
raise NotImplementedError
|
|
|
|
def run_llvm(self, pipeline: Pipeline):
|
|
raise NotImplementedError
|
|
|
|
def run_bolt(self, pipeline: Pipeline):
|
|
raise NotImplementedError
|
|
|
|
class DefaultBenchmarkRunner(BenchmarkRunner):
|
|
def run_rustc(self, pipeline: Pipeline):
|
|
# Here we're profiling the `rustc` frontend, so we also include `Check`.
|
|
# The benchmark set includes various stress tests that put the frontend under pressure.
|
|
run_compiler_benchmarks(
|
|
pipeline,
|
|
profiles=["Check", "Debug", "Opt"],
|
|
scenarios=["All"],
|
|
crates=RUSTC_PGO_CRATES,
|
|
env=dict(
|
|
LLVM_PROFILE_FILE=str(pipeline.rustc_profile_template_path())
|
|
)
|
|
)
|
|
def run_llvm(self, pipeline: Pipeline):
|
|
run_compiler_benchmarks(
|
|
pipeline,
|
|
profiles=["Debug", "Opt"],
|
|
scenarios=["Full"],
|
|
crates=LLVM_PGO_CRATES
|
|
)
|
|
|
|
def run_bolt(self, pipeline: Pipeline):
|
|
run_compiler_benchmarks(
|
|
pipeline,
|
|
profiles=["Check", "Debug", "Opt"],
|
|
scenarios=["Full"],
|
|
crates=LLVM_BOLT_CRATES
|
|
)
|
|
|
|
def run_compiler_benchmarks(
|
|
pipeline: Pipeline,
|
|
profiles: List[str],
|
|
scenarios: List[str],
|
|
crates: List[str],
|
|
env: Optional[Dict[str, str]] = None
|
|
):
|
|
env = env if env is not None else {}
|
|
|
|
# Compile libcore, both in opt-level=0 and opt-level=3
|
|
with change_cwd(pipeline.build_root()):
|
|
cmd([
|
|
pipeline.rustc_stage_2(),
|
|
"--edition", "2021",
|
|
"--crate-type", "lib",
|
|
str(pipeline.checkout_path() / "library/core/src/lib.rs"),
|
|
"--out-dir", pipeline.opt_artifacts()
|
|
], env=dict(RUSTC_BOOTSTRAP="1", **env))
|
|
|
|
cmd([
|
|
pipeline.rustc_stage_2(),
|
|
"--edition", "2021",
|
|
"--crate-type", "lib",
|
|
"-Copt-level=3",
|
|
str(pipeline.checkout_path() / "library/core/src/lib.rs"),
|
|
"--out-dir", pipeline.opt_artifacts()
|
|
], env=dict(RUSTC_BOOTSTRAP="1", **env))
|
|
|
|
# Run rustc-perf benchmarks
|
|
# Benchmark using profile_local with eprintln, which essentially just means
|
|
# don't actually benchmark -- just make sure we run rustc a bunch of times.
|
|
with change_cwd(pipeline.rustc_perf_dir()):
|
|
cmd([
|
|
pipeline.cargo_stage_0(),
|
|
"run",
|
|
"-p", "collector", "--bin", "collector", "--",
|
|
"profile_local", "eprintln",
|
|
pipeline.rustc_stage_2(),
|
|
"--id", "Test",
|
|
"--cargo", pipeline.cargo_stage_0(),
|
|
"--profiles", ",".join(profiles),
|
|
"--scenarios", ",".join(scenarios),
|
|
"--include", ",".join(crates)
|
|
], env=dict(
|
|
RUST_LOG="collector=debug",
|
|
RUSTC=str(pipeline.rustc_stage_0()),
|
|
RUSTC_BOOTSTRAP="1",
|
|
**env
|
|
))
|
|
|
|
|
|
# https://stackoverflow.com/a/31631711/1107768
|
|
def format_bytes(size: int) -> str:
|
|
"""Return the given bytes as a human friendly KiB, MiB or GiB string."""
|
|
KB = 1024
|
|
MB = KB ** 2 # 1,048,576
|
|
GB = KB ** 3 # 1,073,741,824
|
|
TB = KB ** 4 # 1,099,511,627,776
|
|
|
|
if size < KB:
|
|
return f"{size} B"
|
|
elif KB <= size < MB:
|
|
return f"{size / KB:.2f} KiB"
|
|
elif MB <= size < GB:
|
|
return f"{size / MB:.2f} MiB"
|
|
elif GB <= size < TB:
|
|
return f"{size / GB:.2f} GiB"
|
|
else:
|
|
return str(size)
|
|
|
|
|
|
# https://stackoverflow.com/a/63307131/1107768
|
|
def count_files(path: Path) -> int:
|
|
return sum(1 for p in path.rglob("*") if p.is_file())
|
|
|
|
|
|
def count_files_with_prefix(path: Path) -> int:
|
|
return sum(1 for p in glob.glob(f"{path}*") if Path(p).is_file())
|
|
|
|
|
|
# https://stackoverflow.com/a/55659577/1107768
|
|
def get_path_size(path: Path) -> int:
|
|
if path.is_dir():
|
|
return sum(p.stat().st_size for p in path.rglob("*"))
|
|
return path.stat().st_size
|
|
|
|
|
|
def get_path_prefix_size(path: Path) -> int:
|
|
"""
|
|
Get size of all files beginning with the prefix `path`.
|
|
Alternative to shell `du -sh <path>*`.
|
|
"""
|
|
return sum(Path(p).stat().st_size for p in glob.glob(f"{path}*"))
|
|
|
|
|
|
def get_files(directory: Path, filter: Optional[Callable[[Path], bool]] = None) -> Iterable[Path]:
|
|
for file in os.listdir(directory):
|
|
path = directory / file
|
|
if filter is None or filter(path):
|
|
yield path
|
|
|
|
|
|
def build_rustc(
|
|
pipeline: Pipeline,
|
|
args: List[str],
|
|
env: Optional[Dict[str, str]] = None
|
|
):
|
|
arguments = [
|
|
sys.executable,
|
|
pipeline.checkout_path() / "x.py",
|
|
"build",
|
|
"--target", PGO_HOST,
|
|
"--host", PGO_HOST,
|
|
"--stage", "2",
|
|
"library/std"
|
|
] + args
|
|
cmd(arguments, env=env)
|
|
|
|
|
|
def create_pipeline() -> Pipeline:
|
|
if sys.platform == "linux":
|
|
return LinuxPipeline()
|
|
elif sys.platform in ("cygwin", "win32"):
|
|
return WindowsPipeline()
|
|
else:
|
|
raise Exception(f"Optimized build is not supported for platform {sys.platform}")
|
|
|
|
|
|
def gather_llvm_profiles(pipeline: Pipeline, runner: BenchmarkRunner):
|
|
LOGGER.info("Running benchmarks with PGO instrumented LLVM")
|
|
|
|
runner.run_llvm(pipeline)
|
|
|
|
profile_path = pipeline.llvm_profile_merged_file()
|
|
LOGGER.info(f"Merging LLVM PGO profiles to {profile_path}")
|
|
cmd([
|
|
pipeline.downloaded_llvm_dir() / "bin" / "llvm-profdata",
|
|
"merge",
|
|
"-o", profile_path,
|
|
pipeline.llvm_profile_dir_root()
|
|
])
|
|
|
|
LOGGER.info("LLVM PGO statistics")
|
|
LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
|
|
LOGGER.info(
|
|
f"{pipeline.llvm_profile_dir_root()}: {format_bytes(get_path_size(pipeline.llvm_profile_dir_root()))}")
|
|
LOGGER.info(f"Profile file count: {count_files(pipeline.llvm_profile_dir_root())}")
|
|
|
|
# We don't need the individual .profraw files now that they have been merged
|
|
# into a final .profdata
|
|
delete_directory(pipeline.llvm_profile_dir_root())
|
|
|
|
|
|
def gather_rustc_profiles(pipeline: Pipeline, runner: BenchmarkRunner):
|
|
LOGGER.info("Running benchmarks with PGO instrumented rustc")
|
|
|
|
|
|
runner.run_rustc(pipeline)
|
|
|
|
|
|
profile_path = pipeline.rustc_profile_merged_file()
|
|
LOGGER.info(f"Merging Rustc PGO profiles to {profile_path}")
|
|
cmd([
|
|
pipeline.build_artifacts() / "llvm" / "bin" / "llvm-profdata",
|
|
"merge",
|
|
"-o", profile_path,
|
|
pipeline.rustc_profile_dir_root()
|
|
])
|
|
|
|
LOGGER.info("Rustc PGO statistics")
|
|
LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}")
|
|
LOGGER.info(
|
|
f"{pipeline.rustc_profile_dir_root()}: {format_bytes(get_path_size(pipeline.rustc_profile_dir_root()))}")
|
|
LOGGER.info(f"Profile file count: {count_files(pipeline.rustc_profile_dir_root())}")
|
|
|
|
# We don't need the individual .profraw files now that they have been merged
|
|
# into a final .profdata
|
|
delete_directory(pipeline.rustc_profile_dir_root())
|
|
|
|
|
|
def gather_llvm_bolt_profiles(pipeline: Pipeline, runner: BenchmarkRunner):
|
|
LOGGER.info("Running benchmarks with BOLT instrumented LLVM")
|
|
|
|
runner.run_bolt(pipeline)
|
|
|
|
merged_profile_path = pipeline.llvm_bolt_profile_merged_file()
|
|
profile_files_path = Path("/tmp/prof.fdata")
|
|
LOGGER.info(f"Merging LLVM BOLT profiles to {merged_profile_path}")
|
|
|
|
profile_files = sorted(glob.glob(f"{profile_files_path}*"))
|
|
cmd([
|
|
"merge-fdata",
|
|
*profile_files,
|
|
], output_path=merged_profile_path)
|
|
|
|
LOGGER.info("LLVM BOLT statistics")
|
|
LOGGER.info(f"{merged_profile_path}: {format_bytes(get_path_size(merged_profile_path))}")
|
|
LOGGER.info(
|
|
f"{profile_files_path}: {format_bytes(get_path_prefix_size(profile_files_path))}")
|
|
LOGGER.info(f"Profile file count: {count_files_with_prefix(profile_files_path)}")
|
|
|
|
|
|
def clear_llvm_files(pipeline: Pipeline):
|
|
"""
|
|
Rustbuild currently doesn't support rebuilding LLVM when PGO options
|
|
change (or any other llvm-related options); so just clear out the relevant
|
|
directories ourselves.
|
|
"""
|
|
LOGGER.info("Clearing LLVM build files")
|
|
delete_directory(pipeline.build_artifacts() / "llvm")
|
|
delete_directory(pipeline.build_artifacts() / "lld")
|
|
|
|
|
|
def print_binary_sizes(pipeline: Pipeline):
|
|
bin_dir = pipeline.build_artifacts() / "stage2" / "bin"
|
|
binaries = get_files(bin_dir)
|
|
|
|
lib_dir = pipeline.build_artifacts() / "stage2" / "lib"
|
|
libraries = get_files(lib_dir, lambda p: p.suffix == ".so")
|
|
|
|
paths = sorted(binaries) + sorted(libraries)
|
|
with StringIO() as output:
|
|
for path in paths:
|
|
path_str = f"{path.name}:"
|
|
print(f"{path_str:<50}{format_bytes(path.stat().st_size):>14}", file=output)
|
|
LOGGER.info(f"Rustc binary size\n{output.getvalue()}")
|
|
|
|
|
|
def print_free_disk_space(pipeline: Pipeline):
|
|
usage = shutil.disk_usage(pipeline.opt_artifacts())
|
|
total = usage.total
|
|
used = usage.used
|
|
free = usage.free
|
|
|
|
logging.info(
|
|
f"Free disk space: {format_bytes(free)} out of total {format_bytes(total)} ({(used / total) * 100:.2f}% used)")
|
|
|
|
|
|
def log_metrics(step: BuildStep):
|
|
substeps: List[Tuple[int, BuildStep]] = []
|
|
|
|
def visit(step: BuildStep, level: int):
|
|
substeps.append((level, step))
|
|
for child in step.children:
|
|
visit(child, level=level + 1)
|
|
|
|
visit(step, 0)
|
|
|
|
output = StringIO()
|
|
for (level, step) in substeps:
|
|
label = f"{'.' * level}{step.type}"
|
|
print(f"{label:<65}{step.duration:>8.2f}s", file=output)
|
|
logging.info(f"Build step durations\n{output.getvalue()}")
|
|
|
|
|
|
def record_metrics(pipeline: Pipeline, timer: Timer):
|
|
metrics = load_last_metrics(pipeline.metrics_path())
|
|
if metrics is None:
|
|
return
|
|
llvm_steps = tuple(metrics.find_all_by_type("bootstrap::llvm::Llvm"))
|
|
assert len(llvm_steps) > 0
|
|
llvm_duration = sum(step.duration for step in llvm_steps)
|
|
|
|
rustc_steps = tuple(metrics.find_all_by_type("bootstrap::compile::Rustc"))
|
|
assert len(rustc_steps) > 0
|
|
rustc_duration = sum(step.duration for step in rustc_steps)
|
|
|
|
# The LLVM step is part of the Rustc step
|
|
rustc_duration -= llvm_duration
|
|
|
|
timer.add_duration("LLVM", llvm_duration)
|
|
timer.add_duration("Rustc", rustc_duration)
|
|
|
|
log_metrics(metrics)
|
|
|
|
|
|
def execute_build_pipeline(timer: Timer, pipeline: Pipeline, runner: BenchmarkRunner, final_build_args: List[str]):
|
|
# Clear and prepare tmp directory
|
|
shutil.rmtree(pipeline.opt_artifacts(), ignore_errors=True)
|
|
os.makedirs(pipeline.opt_artifacts(), exist_ok=True)
|
|
|
|
pipeline.build_rustc_perf()
|
|
|
|
# Stage 1: Build rustc + PGO instrumented LLVM
|
|
with timer.section("Stage 1 (LLVM PGO)") as stage1:
|
|
with stage1.section("Build rustc and LLVM") as rustc_build:
|
|
build_rustc(pipeline, args=[
|
|
"--llvm-profile-generate"
|
|
], env=dict(
|
|
LLVM_PROFILE_DIR=str(pipeline.llvm_profile_dir_root() / "prof-%p")
|
|
))
|
|
record_metrics(pipeline, rustc_build)
|
|
|
|
with stage1.section("Gather profiles"):
|
|
gather_llvm_profiles(pipeline, runner)
|
|
print_free_disk_space(pipeline)
|
|
|
|
clear_llvm_files(pipeline)
|
|
final_build_args += [
|
|
"--llvm-profile-use",
|
|
pipeline.llvm_profile_merged_file()
|
|
]
|
|
|
|
# Stage 2: Build PGO instrumented rustc + LLVM
|
|
with timer.section("Stage 2 (rustc PGO)") as stage2:
|
|
with stage2.section("Build rustc and LLVM") as rustc_build:
|
|
build_rustc(pipeline, args=[
|
|
"--rust-profile-generate",
|
|
pipeline.rustc_profile_dir_root()
|
|
])
|
|
record_metrics(pipeline, rustc_build)
|
|
|
|
with stage2.section("Gather profiles"):
|
|
gather_rustc_profiles(pipeline, runner)
|
|
print_free_disk_space(pipeline)
|
|
|
|
clear_llvm_files(pipeline)
|
|
final_build_args += [
|
|
"--rust-profile-use",
|
|
pipeline.rustc_profile_merged_file()
|
|
]
|
|
|
|
# Stage 3: Build rustc + BOLT instrumented LLVM
|
|
if pipeline.supports_bolt():
|
|
with timer.section("Stage 3 (LLVM BOLT)") as stage3:
|
|
with stage3.section("Build rustc and LLVM") as rustc_build:
|
|
build_rustc(pipeline, args=[
|
|
"--llvm-profile-use",
|
|
pipeline.llvm_profile_merged_file(),
|
|
"--llvm-bolt-profile-generate",
|
|
"--rust-profile-use",
|
|
pipeline.rustc_profile_merged_file()
|
|
])
|
|
record_metrics(pipeline, rustc_build)
|
|
|
|
with stage3.section("Gather profiles"):
|
|
gather_llvm_bolt_profiles(pipeline, runner)
|
|
|
|
# LLVM is not being cleared here, we want to reuse the previous build
|
|
print_free_disk_space(pipeline)
|
|
final_build_args += [
|
|
"--llvm-bolt-profile-use",
|
|
pipeline.llvm_bolt_profile_merged_file()
|
|
]
|
|
|
|
# Stage 4: Build PGO optimized rustc + PGO/BOLT optimized LLVM
|
|
with timer.section("Stage 4 (final build)") as stage4:
|
|
cmd(final_build_args)
|
|
record_metrics(pipeline, stage4)
|
|
|
|
|
|
def run(runner: BenchmarkRunner):
|
|
logging.basicConfig(
|
|
level=logging.DEBUG,
|
|
format="%(name)s %(levelname)-4s: %(message)s",
|
|
)
|
|
|
|
LOGGER.info(f"Running multi-stage build using Python {sys.version}")
|
|
LOGGER.info(f"Environment values\n{pprint.pformat(dict(os.environ), indent=2)}")
|
|
|
|
build_args = sys.argv[1:]
|
|
|
|
# Skip components that are not needed for try builds to speed them up
|
|
if is_try_build():
|
|
LOGGER.info("Skipping building of unimportant components for a try build")
|
|
for target in ("rust-docs", "rustc-docs", "rust-docs-json", "rust-analyzer",
|
|
"rustc-src", "clippy", "miri", "rustfmt"):
|
|
build_args.extend(["--exclude", target])
|
|
|
|
timer = Timer()
|
|
pipeline = create_pipeline()
|
|
|
|
try:
|
|
execute_build_pipeline(timer, pipeline, runner, build_args)
|
|
except BaseException as e:
|
|
LOGGER.error("The multi-stage build has failed")
|
|
raise e
|
|
finally:
|
|
timer.print_stats()
|
|
print_free_disk_space(pipeline)
|
|
|
|
print_binary_sizes(pipeline)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
runner = DefaultBenchmarkRunner()
|
|
run(runner)
|