From bc3f0e326a5ef77bb4e3531db5190109d8e2420b Mon Sep 17 00:00:00 2001 From: Zalathar Date: Wed, 23 Apr 2025 17:47:01 +1000 Subject: [PATCH] coverage-dump: Extract a common parser method for maybe-compressed bytes --- src/tools/coverage-dump/src/llvm_utils.rs | 39 +++++++++++++++++++++++ src/tools/coverage-dump/src/prf_names.rs | 25 ++------------- 2 files changed, 41 insertions(+), 23 deletions(-) diff --git a/src/tools/coverage-dump/src/llvm_utils.rs b/src/tools/coverage-dump/src/llvm_utils.rs index 017fdbec0fc2..92322b256a82 100644 --- a/src/tools/coverage-dump/src/llvm_utils.rs +++ b/src/tools/coverage-dump/src/llvm_utils.rs @@ -1,7 +1,11 @@ +use std::borrow::Cow; use std::sync::OnceLock; +use anyhow::{anyhow, ensure}; use regex::bytes; +use crate::parser::Parser; + #[cfg(test)] mod tests; @@ -44,3 +48,38 @@ pub(crate) fn truncated_md5(bytes: &[u8]) -> u64 { // or target platform. (See `MD5Result::low` in LLVM's `MD5.h`.) u64::from_le_bytes(hash) } + +impl<'a> Parser<'a> { + /// Reads a sequence of: + /// - Length of uncompressed data in bytes, as ULEB128 + /// - Length of compressed data in bytes (or 0), as ULEB128 + /// - The indicated number of compressed or uncompressed bytes + /// + /// If the number of compressed bytes is 0, the subsequent bytes are + /// uncompressed. Otherwise, the subsequent bytes are compressed, and will + /// be decompressed. + /// + /// Returns the uncompressed bytes that were read directly or decompressed. + pub(crate) fn read_chunk_to_uncompressed_bytes(&mut self) -> anyhow::Result> { + let uncompressed_len = self.read_uleb128_usize()?; + let compressed_len = self.read_uleb128_usize()?; + + if compressed_len == 0 { + // The bytes are uncompressed, so read them directly. + let uncompressed_bytes = self.read_n_bytes(uncompressed_len)?; + Ok(Cow::Borrowed(uncompressed_bytes)) + } else { + // The bytes are compressed, so read and decompress them. + let compressed_bytes = self.read_n_bytes(compressed_len)?; + + let uncompressed_bytes = miniz_oxide::inflate::decompress_to_vec_zlib_with_limit( + compressed_bytes, + uncompressed_len, + ) + .map_err(|e| anyhow!("{e:?}"))?; + ensure!(uncompressed_bytes.len() == uncompressed_len); + + Ok(Cow::Owned(uncompressed_bytes)) + } + } +} diff --git a/src/tools/coverage-dump/src/prf_names.rs b/src/tools/coverage-dump/src/prf_names.rs index fe193efd8e5f..f9ab35deba50 100644 --- a/src/tools/coverage-dump/src/prf_names.rs +++ b/src/tools/coverage-dump/src/prf_names.rs @@ -1,7 +1,6 @@ use std::collections::HashMap; use std::sync::OnceLock; -use anyhow::{anyhow, ensure}; use regex::Regex; use crate::llvm_utils::{truncated_md5, unescape_llvm_string_contents}; @@ -43,26 +42,8 @@ pub(crate) fn make_function_names_table(llvm_ir: &str) -> anyhow::Result anyhow::Result