coverage-dump: Include filenames hash in covfun line data
This commit is contained in:
parent
bc3f0e326a
commit
f1b8cd433f
4 changed files with 102 additions and 26 deletions
|
|
@ -777,6 +777,7 @@ name = "coverage-dump"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools",
|
||||
"leb128",
|
||||
"md-5",
|
||||
"miniz_oxide 0.7.4",
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
anyhow = "1.0.71"
|
||||
itertools = "0.12"
|
||||
leb128 = "0.2.5"
|
||||
md5 = { package = "md-5" , version = "0.10.5" }
|
||||
miniz_oxide = "0.7.1"
|
||||
|
|
|
|||
|
|
@ -1,13 +1,17 @@
|
|||
use std::collections::HashMap;
|
||||
use std::fmt::{self, Debug, Write as _};
|
||||
use std::sync::OnceLock;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use anyhow::{Context, anyhow};
|
||||
use anyhow::{Context, anyhow, ensure};
|
||||
use itertools::Itertools;
|
||||
use regex::Regex;
|
||||
|
||||
use crate::llvm_utils::unescape_llvm_string_contents;
|
||||
use crate::parser::Parser;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
pub(crate) fn dump_covfun_mappings(
|
||||
llvm_ir: &str,
|
||||
function_names: &HashMap<u64, String>,
|
||||
|
|
@ -16,9 +20,12 @@ pub(crate) fn dump_covfun_mappings(
|
|||
// each entry with its (demangled) name.
|
||||
let mut covfun_entries = llvm_ir
|
||||
.lines()
|
||||
.filter_map(covfun_line_data)
|
||||
.map(|line_data| (function_names.get(&line_data.name_hash).map(String::as_str), line_data))
|
||||
.collect::<Vec<_>>();
|
||||
.filter(|line| is_covfun_line(line))
|
||||
.map(parse_covfun_line)
|
||||
.map_ok(|line_data| {
|
||||
(function_names.get(&line_data.name_hash).map(String::as_str), line_data)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
covfun_entries.sort_by(|a, b| {
|
||||
// Sort entries primarily by name, to help make the order consistent
|
||||
// across platforms and relatively insensitive to changes.
|
||||
|
|
@ -108,36 +115,50 @@ pub(crate) fn dump_covfun_mappings(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct CovfunLineData {
|
||||
name_hash: u64,
|
||||
is_used: bool,
|
||||
name_hash: u64,
|
||||
filenames_hash: u64,
|
||||
payload: Vec<u8>,
|
||||
}
|
||||
|
||||
/// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
|
||||
/// entry, and if so extracts relevant data in a `CovfunLineData`.
|
||||
fn covfun_line_data(line: &str) -> Option<CovfunLineData> {
|
||||
let re = {
|
||||
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
|
||||
// rather than the section name, because the section name is harder to
|
||||
// extract and differs across Linux/Windows/macOS. We also extract the
|
||||
// symbol name hash from the variable name rather than the data, since
|
||||
// it's easier and both should match.
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
RE.get_or_init(|| {
|
||||
Regex::new(
|
||||
r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"#,
|
||||
)
|
||||
.unwrap()
|
||||
})
|
||||
};
|
||||
fn is_covfun_line(line: &str) -> bool {
|
||||
line.starts_with("@__covrec_")
|
||||
}
|
||||
|
||||
let captures = re.captures(line)?;
|
||||
let name_hash = u64::from_str_radix(&captures["name_hash"], 16).unwrap();
|
||||
/// Given a line of LLVM IR assembly that should contain an `__llvm_covfun`
|
||||
/// entry, parses it to extract relevant data in a `CovfunLineData`.
|
||||
fn parse_covfun_line(line: &str) -> anyhow::Result<CovfunLineData> {
|
||||
ensure!(is_covfun_line(line));
|
||||
|
||||
// We cheat a little bit and match variable names `@__covrec_[HASH]u`
|
||||
// rather than the section name, because the section name is harder to
|
||||
// extract and differs across Linux/Windows/macOS.
|
||||
const RE_STRING: &str = r#"(?x)^
|
||||
@__covrec_[0-9A-Z]+(?<is_used>u)?
|
||||
\ = \ # (trailing space)
|
||||
.*
|
||||
<\{
|
||||
\ i64 \ (?<name_hash> -? [0-9]+),
|
||||
\ i32 \ -? [0-9]+, # (length of payload; currently unused)
|
||||
\ i64 \ -? [0-9]+, # (source hash; currently unused)
|
||||
\ i64 \ (?<filenames_hash> -? [0-9]+),
|
||||
\ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
|
||||
\ # (trailing space)
|
||||
}>
|
||||
.*$
|
||||
"#;
|
||||
static RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(RE_STRING).unwrap());
|
||||
|
||||
let captures =
|
||||
RE.captures(line).with_context(|| format!("couldn't parse covfun line: {line:?}"))?;
|
||||
let is_used = captures.name("is_used").is_some();
|
||||
let name_hash = i64::from_str_radix(&captures["name_hash"], 10).unwrap() as u64;
|
||||
let filenames_hash = i64::from_str_radix(&captures["filenames_hash"], 10).unwrap() as u64;
|
||||
let payload = unescape_llvm_string_contents(&captures["payload"]);
|
||||
|
||||
Some(CovfunLineData { name_hash, is_used, payload })
|
||||
Ok(CovfunLineData { is_used, name_hash, filenames_hash, payload })
|
||||
}
|
||||
|
||||
// Extra parser methods only needed when parsing `covfun` payloads.
|
||||
|
|
|
|||
53
src/tools/coverage-dump/src/covfun/tests.rs
Normal file
53
src/tools/coverage-dump/src/covfun/tests.rs
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
use super::{CovfunLineData, parse_covfun_line};
|
||||
|
||||
/// Integers in LLVM IR are not inherently signed/unsigned, and the text format tends
|
||||
/// to emit them in signed form, so this helper function converts `i64` to `u64`.
|
||||
fn as_u64(x: i64) -> u64 {
|
||||
x as u64
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_covfun_line_data() {
|
||||
struct Case {
|
||||
line: &'static str,
|
||||
expected: CovfunLineData,
|
||||
}
|
||||
let cases = &[
|
||||
// Copied from `trivial.ll`:
|
||||
Case {
|
||||
line: r#"@__covrec_49A9BAAE5F896E81u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 5307978893922758273, i32 9, i64 445092354169400020, i64 6343436898695299756, [9 x i8] c"\01\01\00\01\01\03\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
|
||||
expected: CovfunLineData {
|
||||
is_used: true,
|
||||
name_hash: as_u64(5307978893922758273),
|
||||
filenames_hash: as_u64(6343436898695299756),
|
||||
payload: b"\x01\x01\x00\x01\x01\x03\x01\x00\x0D".to_vec(),
|
||||
},
|
||||
},
|
||||
// Copied from `on-off-sandwich.ll`:
|
||||
Case {
|
||||
line: r#"@__covrec_D0CE53C5E64F319Au = linkonce_odr hidden constant <{ i64, i32, i64, i64, [14 x i8] }> <{ i64 -3400688559180533350, i32 14, i64 7307957714577672185, i64 892196767019953100, [14 x i8] c"\01\01\00\02\01\10\05\02\10\01\07\05\00\06" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
|
||||
expected: CovfunLineData {
|
||||
is_used: true,
|
||||
name_hash: as_u64(-3400688559180533350),
|
||||
filenames_hash: as_u64(892196767019953100),
|
||||
payload: b"\x01\x01\x00\x02\x01\x10\x05\x02\x10\x01\x07\x05\x00\x06".to_vec(),
|
||||
},
|
||||
},
|
||||
// Copied from `no-core.ll`:
|
||||
Case {
|
||||
line: r#"@__covrec_F8016FC82D46106u = linkonce_odr hidden constant <{ i64, i32, i64, i64, [9 x i8] }> <{ i64 1116917981370409222, i32 9, i64 -8857254680411629915, i64 -3625186110715410276, [9 x i8] c"\01\01\00\01\01\0C\01\00\0D" }>, section "__LLVM_COV,__llvm_covfun", align 8"#,
|
||||
expected: CovfunLineData {
|
||||
is_used: true,
|
||||
name_hash: as_u64(1116917981370409222),
|
||||
filenames_hash: as_u64(-3625186110715410276),
|
||||
payload: b"\x01\x01\x00\x01\x01\x0C\x01\x00\x0D".to_vec(),
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
for &Case { line, ref expected } in cases {
|
||||
println!("- {line}");
|
||||
let line_data = parse_covfun_line(line).map_err(|e| e.to_string());
|
||||
assert_eq!(line_data.as_ref(), Ok(expected));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue