coverage-dump: Include filenames hash in covfun line data

This commit is contained in:
Zalathar 2025-04-22 21:49:57 +10:00
parent bc3f0e326a
commit f1b8cd433f
4 changed files with 102 additions and 26 deletions

View file

@ -777,6 +777,7 @@ name = "coverage-dump"
version = "0.1.0"
dependencies = [
"anyhow",
"itertools",
"leb128",
"md-5",
"miniz_oxide 0.7.4",

View file

@ -7,6 +7,7 @@ edition = "2021"
[dependencies]
anyhow = "1.0.71"
itertools = "0.12"
leb128 = "0.2.5"
md5 = { package = "md-5" , version = "0.10.5" }
miniz_oxide = "0.7.1"

View file

@ -1,13 +1,17 @@
use std::collections::HashMap;
use std::fmt::{self, Debug, Write as _};
use std::sync::OnceLock;
use std::sync::LazyLock;
use anyhow::{Context, anyhow};
use anyhow::{Context, anyhow, ensure};
use itertools::Itertools;
use regex::Regex;
use crate::llvm_utils::unescape_llvm_string_contents;
use crate::parser::Parser;
#[cfg(test)]
mod tests;
pub(crate) fn dump_covfun_mappings(
llvm_ir: &str,
function_names: &HashMap<u64, String>,
@ -16,9 +20,12 @@ pub(crate) fn dump_covfun_mappings(
// each entry with its (demangled) name.
let mut covfun_entries = llvm_ir
.lines()
.filter_map(covfun_line_data)
.map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))
.collect::<Vec<_>>();
.filter(|line| is_covfun_line(line))
.map(parse_covfun_line)
.map_ok(|line_data| {
(function_names.get(&line_data.name_hash).map(String::as_str), line_data)
})
.collect::<Result<Vec<_>, _>>()?;
covfun_entries.sort_by(|a, b| {
// Sort entries primarily by name, to help make the order consistent
// across platforms and relatively insensitive to changes.
@ -108,36 +115,50 @@ pub(crate) fn dump_covfun_mappings(
Ok(())
}
#[derive(Debug, PartialEq, Eq)]
struct CovfunLineData {
name_hash: u64,
is_used: bool,
name_hash: u64,
filenames_hash: u64,
payload: Vec<u8>,
}
/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
/// entry, and if so extracts relevant data in a `CovfunLineData`.
fn covfun_line_data(line: &str) -> Option<CovfunLineData> {
let re = {
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
// rather than the section name, because the section name is harder to
// extract and differs across Linux/Windows/macOS. We also extract the
// symbol name hash from the variable name rather than the data, since
// it's easier and both should match.
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| {
Regex::new(
r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,
)
.unwrap()
})
};
fn is_covfun_line(line: &str) -> bool {
line.starts_with("@__covrec_")
}
let captures = re.captures(line)?;
let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();
/// Given a line of LLVM IR assembly that should contain an `__llvm_covfun`
/// entry, parses it to extract relevant data in a `CovfunLineData`.
fn parse_covfun_line(line: &str) -> anyhow::Result<CovfunLineData> {
ensure!(is_covfun_line(line));
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
// rather than the section name, because the section name is harder to
// extract and differs across Linux/Windows/macOS.
const RE_STRING: &str = r#"(?x)^
@__covrec_[0-9A-Z]+(?<is_used>u)?
\ = \ # (trailing space)
.*
<\{
\ i64 \ (?<name_hash> -? [0-9]+),
\ i32 \ -? [0-9]+, # (length of payload; currently unused)
\ i64 \ -? [0-9]+, # (source hash; currently unused)
\ i64 \ (?<filenames_hash> -? [0-9]+),
\ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
\ # (trailing space)
}>
.*$
"#;
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(RE_STRING).unwrap());
let captures =
RE.captures(line).with_context(|| format!("couldn't parse covfun line: {line:?}"))?;
let is_used = captures.name("is_used").is_some();
let name_hash = i64::from_str_radix(&captures["name_hash"], 10).unwrap() as u64;
let filenames_hash = i64::from_str_radix(&captures["filenames_hash"], 10).unwrap() as u64;
let payload = unescape_llvm_string_contents(&captures["payload"]);
Some(CovfunLineData { name_hash, is_used, payload })
Ok(CovfunLineData { is_used, name_hash, filenames_hash, payload })
}
// Extra parser methods only needed when parsing `covfun` payloads.

View file

@ -0,0 +1,53 @@
use super::{CovfunLineData, parse_covfun_line};
/// Integers in LLVM IR are not inherently signed/unsigned, and the text format tends
/// to emit them in signed form, so this helper function converts `i64` to `u64`.
fn as_u64(x: i64) -> u64 {
x as u64
}
#[test]
fn parse_covfun_line_data() {
struct Case {
line: &'static str,
expected: CovfunLineData,
}
let cases = &[
// Copied from `trivial.ll`:
Case {
line: r#"@__covrec_49A9BAAE5F896E81u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 5307978893922758273, i32 9, i64 445092354169400020, i64 6343436898695299756, [9 x i8] c"\01\01\00\01\01\03\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
expected: CovfunLineData {
is_used: true,
name_hash: as_u64(5307978893922758273),
filenames_hash: as_u64(6343436898695299756),
payload: b"\x01\x01\x00\x01\x01\x03\x01\x00\x0D".to_vec(),
},
},
// Copied from `on-off-sandwich.ll`:
Case {
line: r#"@__covrec_D0CE53C5E64F319Au = linkonce_odr hidden constant <{ i64, i32, i64, i64, [14 x i8] }> <{ i64 -3400688559180533350, i32 14, i64 7307957714577672185, i64 892196767019953100, [14 x i8] c"\01\01\00\02\01\10\05\02\10\01\07\05\00\06" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
expected: CovfunLineData {
is_used: true,
name_hash: as_u64(-3400688559180533350),
filenames_hash: as_u64(892196767019953100),
payload: b"\x01\x01\x00\x02\x01\x10\x05\x02\x10\x01\x07\x05\x00\x06".to_vec(),
},
},
// Copied from `no-core.ll`:
Case {
line: r#"@__covrec_F8016FC82D46106u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 1116917981370409222, i32 9, i64 -8857254680411629915, i64 -3625186110715410276, [9 x i8] c"\01\01\00\01\01\0C\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
expected: CovfunLineData {
is_used: true,
name_hash: as_u64(1116917981370409222),
filenames_hash: as_u64(-3625186110715410276),
payload: b"\x01\x01\x00\x01\x01\x0C\x01\x00\x0D".to_vec(),
},
},
];
for &Case { line, ref expected } in cases {
println!("- {line}");
let line_data = parse_covfun_line(line).map_err(|e| e.to_string());
assert_eq!(line_data.as_ref(), Ok(expected));
}
}