Add diff of bootstrap steps

This commit is contained in:
Jakub Beránek 2025-03-25 13:43:25 +01:00
parent 6df2d58933
commit 813783e711
5 changed files with 161 additions and 39 deletions

View file

@ -109,6 +109,8 @@ pub struct BuildStep {
pub r#type: String,
pub children: Vec<BuildStep>,
pub duration: Duration,
// Full name of the step, including all parent names
pub full_name: String,
}
impl BuildStep {
@ -116,7 +118,7 @@ impl BuildStep {
/// The most important thing is that the build step aggregates the
/// durations of all children, so that it can be easily accessed.
pub fn from_invocation(invocation: &JsonInvocation) -> Self {
fn parse(node: &JsonNode) -> Option<BuildStep> {
fn parse(node: &JsonNode, parent_name: &str) -> Option<BuildStep> {
match node {
JsonNode::RustbuildStep {
type_: kind,
@ -124,11 +126,14 @@ impl BuildStep {
duration_excluding_children_sec,
..
} => {
let children: Vec<_> = children.into_iter().filter_map(parse).collect();
let full_name = format!("{parent_name}-{kind}");
let children: Vec<_> =
children.into_iter().filter_map(|s| parse(s, &full_name)).collect();
let children_duration = children.iter().map(|c| c.duration).sum::<Duration>();
Some(BuildStep {
r#type: kind.to_string(),
children,
full_name,
duration: children_duration
+ Duration::from_secs_f64(*duration_excluding_children_sec),
})
@ -138,8 +143,13 @@ impl BuildStep {
}
let duration = Duration::from_secs_f64(invocation.duration_including_children_sec);
let children: Vec<_> = invocation.children.iter().filter_map(parse).collect();
Self { r#type: "total".to_string(), children, duration }
// The root "total" step is kind of a virtual step that encompasses all other steps,
// but it is not a real parent of the other steps.
// We thus start the parent name hierarchy here and use an empty string
// as the parent name of the top-level steps.
let children: Vec<_> = invocation.children.iter().filter_map(|s| parse(s, "")).collect();
Self { r#type: "total".to_string(), children, duration, full_name: "total".to_string() }
}
pub fn find_all_by_type(&self, r#type: &str) -> Vec<&Self> {
@ -159,7 +169,7 @@ impl BuildStep {
/// Returns a Vec with all substeps, ordered by their hierarchical order.
/// The first element of the tuple is the depth of a given step.
fn linearize_steps(&self) -> Vec<(u32, &BuildStep)> {
pub fn linearize_steps(&self) -> Vec<(u32, &BuildStep)> {
let mut substeps: Vec<(u32, &BuildStep)> = Vec::new();
fn visit<'a>(step: &'a BuildStep, level: u32, substeps: &mut Vec<(u32, &'a BuildStep)>) {
@ -180,14 +190,14 @@ pub fn format_build_steps(root: &BuildStep) -> String {
let mut output = String::new();
for (level, step) in root.linearize_steps() {
let label = format!(
"{}{}",
".".repeat(level as usize),
// Bootstrap steps can be generic and thus contain angle brackets (<...>).
// However, Markdown interprets these as HTML, so we need to escap ethem.
step.r#type.replace('<', "&lt;").replace('>', "&gt;")
);
let label = format!("{}{}", ".".repeat(level as usize), escape_step_name(step));
writeln!(output, "{label:.<65}{:>8.2}s", step.duration.as_secs_f64()).unwrap();
}
output
}
/// Bootstrap steps can be generic and thus contain angle brackets (<...>).
/// However, Markdown interprets these as HTML, so we need to escap ethem.
pub fn escape_step_name(step: &BuildStep) -> String {
step.r#type.replace('<', "&lt;").replace('>', "&gt;")
}

View file

@ -107,6 +107,7 @@ dependencies = [
"build_helper",
"clap",
"csv",
"diff",
"glob-match",
"insta",
"serde",
@ -241,6 +242,12 @@ dependencies = [
"powerfmt",
]
[[package]]
name = "diff"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
[[package]]
name = "displaydoc"
version = "0.2.5"

View file

@ -7,6 +7,7 @@ edition = "2021"
anyhow = "1"
clap = { version = "4.5", features = ["derive"] }
csv = "1"
diff = "0.1"
glob-match = "0.2"
serde = { version = "1", features = ["derive"] }
serde_yaml = "0.9"

View file

@ -1,33 +1,134 @@
use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::Debug;
use build_helper::metrics::{
BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, format_build_steps,
BuildStep, JsonRoot, TestOutcome, TestSuite, TestSuiteMetadata, escape_step_name,
format_build_steps,
};
use crate::metrics;
use crate::metrics::{JobMetrics, JobName, get_test_suites};
use crate::utils::{output_details, pluralize};
pub fn output_bootstrap_stats(metrics: &JsonRoot) {
/// Outputs durations of individual bootstrap steps from the gathered bootstrap invocations,
/// and also a table with summarized information about executed tests.
pub fn output_bootstrap_stats(metrics: &JsonRoot, parent_metrics: Option<&JsonRoot>) {
if !metrics.invocations.is_empty() {
println!("# Bootstrap steps");
record_bootstrap_step_durations(&metrics);
record_bootstrap_step_durations(&metrics, parent_metrics);
record_test_suites(&metrics);
}
}
fn record_bootstrap_step_durations(metrics: &JsonRoot) {
fn record_bootstrap_step_durations(metrics: &JsonRoot, parent_metrics: Option<&JsonRoot>) {
let parent_steps: HashMap<String, BuildStep> = parent_metrics
.map(|metrics| {
metrics
.invocations
.iter()
.map(|invocation| {
(invocation.cmdline.clone(), BuildStep::from_invocation(invocation))
})
.collect()
})
.unwrap_or_default();
for invocation in &metrics.invocations {
let step = BuildStep::from_invocation(invocation);
let table = format_build_steps(&step);
eprintln!("Step `{}`\n{table}\n", invocation.cmdline);
output_details(&invocation.cmdline, || {
output_details(&format!("{} (steps)", invocation.cmdline), || {
println!("<pre><code>{table}</code></pre>");
});
// If there was a parent bootstrap invocation with the same cmdline, diff it
if let Some(parent_step) = parent_steps.get(&invocation.cmdline) {
let table = format_build_step_diffs(&step, parent_step);
let duration_before = parent_step.duration.as_secs();
let duration_after = step.duration.as_secs();
output_details(
&format!("{} (diff) ({duration_before}s -> {duration_after}s)", invocation.cmdline),
|| {
println!("{table}");
},
);
}
}
eprintln!("Recorded {} bootstrap invocation(s)", metrics.invocations.len());
}
/// Creates a table that displays a diff between the durations of steps between
/// two bootstrap invocations.
/// It also shows steps that were missing before/after.
fn format_build_step_diffs(current: &BuildStep, parent: &BuildStep) -> String {
use std::fmt::Write;
// Helper struct that compares steps by their full name
struct StepByName<'a>((u32, &'a BuildStep));
impl<'a> PartialEq for StepByName<'a> {
fn eq(&self, other: &Self) -> bool {
self.0.1.full_name.eq(&other.0.1.full_name)
}
}
fn get_steps(step: &BuildStep) -> Vec<StepByName> {
step.linearize_steps().into_iter().map(|v| StepByName(v)).collect()
}
let steps_before = get_steps(parent);
let steps_after = get_steps(current);
let mut table = "| Step | Before | After | Change |\n".to_string();
writeln!(table, "|:-----|-------:|------:|-------:|").unwrap();
// Try to match removed, added and same steps using a classic diff algorithm
for result in diff::slice(&steps_before, &steps_after) {
let (step, before, after, change) = match result {
// The step was found both before and after
diff::Result::Both(before, after) => {
let duration_before = before.0.1.duration;
let duration_after = after.0.1.duration;
let pct_change = duration_after.as_secs_f64() / duration_before.as_secs_f64();
let pct_change = pct_change * 100.0;
// Normalize around 100, to get + for regression and - for improvements
let pct_change = pct_change - 100.0;
(
before,
format!("{:.2}s", duration_before.as_secs_f64()),
format!("{:.2}s", duration_after.as_secs_f64()),
format!("{pct_change:.1}%"),
)
}
// The step was only found in the parent, so it was likely removed
diff::Result::Left(removed) => (
removed,
format!("{:.2}s", removed.0.1.duration.as_secs_f64()),
"".to_string(),
"(removed)".to_string(),
),
// The step was only found in the current commit, so it was likely added
diff::Result::Right(added) => (
added,
"".to_string(),
format!("{:.2}s", added.0.1.duration.as_secs_f64()),
"(added)".to_string(),
),
};
let prefix = ".".repeat(step.0.0 as usize);
writeln!(
table,
"| {prefix}{} | {before} | {after} | {change} |",
escape_step_name(step.0.1),
)
.unwrap();
}
table
}
fn record_test_suites(metrics: &JsonRoot) {
let suites = metrics::get_test_suites(&metrics);
@ -82,8 +183,7 @@ fn render_table(suites: BTreeMap<String, TestSuiteRecord>) -> String {
table
}
/// Computes a post merge CI analysis report of test differences
/// between the `parent` and `current` commits.
/// Outputs a report of test differences between the `parent` and `current` commits.
pub fn output_test_diffs(job_metrics: HashMap<JobName, JobMetrics>) {
let aggregated_test_diffs = aggregate_test_diffs(&job_metrics);
report_test_diffs(aggregated_test_diffs);

View file

@ -144,31 +144,35 @@ fn postprocess_metrics(
job_name: Option<String>,
) -> anyhow::Result<()> {
let metrics = load_metrics(&metrics_path)?;
output_bootstrap_stats(&metrics);
let (Some(parent), Some(job_name)) = (parent, job_name) else {
return Ok(());
};
if let (Some(parent), Some(job_name)) = (parent, job_name) {
// This command is executed also on PR builds, which might not have parent metrics
// available, because some PR jobs don't run on auto builds, and PR jobs do not upload metrics
// due to missing permissions.
// To avoid having to detect if this is a PR job, and to avoid having failed steps in PR jobs,
// we simply print an error if the parent metrics were not found, but otherwise exit
// successfully.
match download_job_metrics(&job_name, &parent).context("cannot download parent metrics") {
Ok(parent_metrics) => {
output_bootstrap_stats(&metrics, Some(&parent_metrics));
// This command is executed also on PR builds, which might not have parent metrics
// available, because some PR jobs don't run on auto builds, and PR jobs do not upload metrics
// due to missing permissions.
// To avoid having to detect if this is a PR job, and to avoid having failed steps in PR jobs,
// we simply print an error if the parent metrics were not found, but otherwise exit
// successfully.
match download_job_metrics(&job_name, &parent).context("cannot download parent metrics") {
Ok(parent_metrics) => {
let job_metrics = HashMap::from([(
job_name,
JobMetrics { parent: Some(parent_metrics), current: metrics },
)]);
output_test_diffs(job_metrics);
}
Err(error) => {
eprintln!("Metrics for job `{job_name}` and commit `{parent}` not found: {error:?}");
let job_metrics = HashMap::from([(
job_name,
JobMetrics { parent: Some(parent_metrics), current: metrics },
)]);
output_test_diffs(job_metrics);
return Ok(());
}
Err(error) => {
eprintln!(
"Metrics for job `{job_name}` and commit `{parent}` not found: {error:?}"
);
}
}
}
output_bootstrap_stats(&metrics, None);
Ok(())
}