coverage-dump: Extract a common parser method for maybe-compressed bytes

This commit is contained in:
Zalathar 2025-04-23 17:47:01 +10:00
parent 89319f2e12
commit bc3f0e326a
2 changed files with 41 additions and 23 deletions

View file

@ -1,7 +1,11 @@
use std::borrow::Cow;
use std::sync::OnceLock;
use anyhow::{anyhow, ensure};
use regex::bytes;
use crate::parser::Parser;
#[cfg(test)]
mod tests;
@ -44,3 +48,38 @@ pub(crate) fn truncated_md5(bytes: &[u8]) -> u64 {
// or target platform. (See `MD5Result::low` in LLVM's `MD5.h`.)
u64::from_le_bytes(hash)
}
impl<'a> Parser<'a> {
/// Reads a sequence of:
/// - Length of uncompressed data in bytes, as ULEB128
/// - Length of compressed data in bytes (or 0), as ULEB128
/// - The indicated number of compressed or uncompressed bytes
///
/// If the number of compressed bytes is 0, the subsequent bytes are
/// uncompressed. Otherwise, the subsequent bytes are compressed, and will
/// be decompressed.
///
/// Returns the uncompressed bytes that were read directly or decompressed.
pub(crate) fn read_chunk_to_uncompressed_bytes(&mut self) -> anyhow::Result<Cow<'a, [u8]>> {
let uncompressed_len = self.read_uleb128_usize()?;
let compressed_len = self.read_uleb128_usize()?;
if compressed_len == 0 {
// The bytes are uncompressed, so read them directly.
let uncompressed_bytes = self.read_n_bytes(uncompressed_len)?;
Ok(Cow::Borrowed(uncompressed_bytes))
} else {
// The bytes are compressed, so read and decompress them.
let compressed_bytes = self.read_n_bytes(compressed_len)?;
let uncompressed_bytes = miniz_oxide::inflate::decompress_to_vec_zlib_with_limit(
compressed_bytes,
uncompressed_len,
)
.map_err(|e| anyhow!("{e:?}"))?;
ensure!(uncompressed_bytes.len() == uncompressed_len);
Ok(Cow::Owned(uncompressed_bytes))
}
}
}

View file

@ -1,7 +1,6 @@
use std::collections::HashMap;
use std::sync::OnceLock;
use anyhow::{anyhow, ensure};
use regex::Regex;
use crate::llvm_utils::{truncated_md5, unescape_llvm_string_contents};
@ -43,26 +42,8 @@ pub(crate) fn make_function_names_table(llvm_ir: &str) -> anyhow::Result<HashMap
for payload in llvm_ir.lines().filter_map(prf_names_payload).map(unescape_llvm_string_contents)
{
let mut parser = Parser::new(&payload);
let uncompressed_len = parser.read_uleb128_usize()?;
let compressed_len = parser.read_uleb128_usize()?;
let uncompressed_bytes_vec;
let uncompressed_bytes: &[u8] = if compressed_len == 0 {
// The symbol name bytes are uncompressed, so read them directly.
parser.read_n_bytes(uncompressed_len)?
} else {
// The symbol name bytes are compressed, so read and decompress them.
let compressed_bytes = parser.read_n_bytes(compressed_len)?;
uncompressed_bytes_vec = miniz_oxide::inflate::decompress_to_vec_zlib_with_limit(
compressed_bytes,
uncompressed_len,
)
.map_err(|e| anyhow!("{e:?}"))?;
ensure!(uncompressed_bytes_vec.len() == uncompressed_len);
&uncompressed_bytes_vec
};
let uncompressed_bytes = parser.read_chunk_to_uncompressed_bytes()?;
parser.ensure_empty()?;
// Symbol names in the payload are separated by `0x01` bytes.
for raw_name in uncompressed_bytes.split(|&b| b == 0x01) {
@ -70,8 +51,6 @@ pub(crate) fn make_function_names_table(llvm_ir: &str) -> anyhow::Result<HashMap
let demangled = demangle_if_able(raw_name)?;
map.insert(hash, demangled);
}
parser.ensure_empty()?;
}
Ok(map)