Add benchmarks using iai-callgrind

This crate [1] makes it reasonably easy to get instruction count
performance metrics that are stable enough to run in CI, and has worked
out well since integrating it with `libm`. Add new benchmarks for `mem`
functions using `iai-callgrind`, modeling them off of the existing
benchmarks.

[1]: https://github.com/iai-callgrind/iai-callgrind
This commit is contained in:
Trevor Gross 2025-03-20 11:19:09 +00:00 committed by Trevor Gross
parent a0d40f287a
commit f0f9cfa463
2 changed files with 488 additions and 0 deletions

View file

@ -16,6 +16,8 @@ doctest = false
rand_xoshiro = "0.6"
# To compare float builtins against
rustc_apfloat = "0.2.1"
# Really a dev dependency, but dev dependencies can't be optional
iai-callgrind = { version = "0.14.0", optional = true }
[dependencies.compiler_builtins]
path = "../compiler-builtins"
@ -47,9 +49,16 @@ no-sys-f16-f64-convert = []
# Skip tests that rely on f16 symbols being available on the system
no-sys-f16 = ["no-sys-f16-f64-convert"]
# Enable icount benchmarks (requires iai-callgrind and valgrind)
icount = ["dep:iai-callgrind"]
# Enable report generation without bringing in more dependencies by default
benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
# NOTE: benchmarks must be run with `--no-default-features` or with
# `-p testcrate`, otherwise the default `compiler-builtins` feature of the
# `compiler_builtins` crate gets activated, resulting in linker errors.
[[bench]]
name = "float_add"
harness = false
@ -85,3 +94,8 @@ harness = false
[[bench]]
name = "float_pow"
harness = false
[[bench]]
name = "mem_icount"
harness = false
required-features = ["icount"]

View file

@ -0,0 +1,474 @@
//! Benchmarks that use Callgrind (via `iai_callgrind`) to report instruction count metrics. This
//! is stable enough to be tested in CI.
use std::hint::black_box;
use std::{ops, slice};
use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
use iai_callgrind::{library_benchmark, library_benchmark_group, main};
const PAGE_SIZE: usize = 0x1000;
#[derive(Clone)]
#[repr(C, align(0x1000))]
struct Page([u8; PAGE_SIZE]);
/// A buffer that is page-aligned by default, with an optional offset to create a
/// misalignment.
struct AlignedSlice {
buf: Box<[Page]>,
len: usize,
offset: usize,
}
impl AlignedSlice {
/// Allocate a slice aligned to ALIGN with at least `len` items, with `offset` from
/// page alignment.
fn new_zeroed(len: usize, offset: usize) -> Self {
assert!(offset < PAGE_SIZE);
let total_len = len + offset;
let items = (total_len / PAGE_SIZE) + if total_len % PAGE_SIZE > 0 { 1 } else { 0 };
let buf = vec![Page([0u8; PAGE_SIZE]); items].into_boxed_slice();
AlignedSlice { buf, len, offset }
}
}
impl ops::Deref for AlignedSlice {
type Target = [u8];
fn deref(&self) -> &Self::Target {
unsafe { slice::from_raw_parts(self.buf.as_ptr().cast::<u8>().add(self.offset), self.len) }
}
}
impl ops::DerefMut for AlignedSlice {
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe {
slice::from_raw_parts_mut(
self.buf.as_mut_ptr().cast::<u8>().add(self.offset),
self.len,
)
}
}
}
mod mcpy {
use super::*;
struct Cfg {
len: usize,
s_off: usize,
d_off: usize,
}
fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) {
let Cfg { len, s_off, d_off } = cfg;
println!("{len} bytes, {s_off} src offset, {d_off} dst offset");
let mut src = AlignedSlice::new_zeroed(len, s_off);
let dst = AlignedSlice::new_zeroed(len, d_off);
src.fill(1);
(len, src, dst)
}
#[library_benchmark]
#[benches::aligned(
args = [
Cfg { len: 16, s_off: 0, d_off: 0 },
Cfg { len: 16, s_off: 0, d_off: 0 },
Cfg { len: 28, s_off: 0, d_off: 0 },
Cfg { len: 32, s_off: 0, d_off: 0 },
Cfg { len: 36, s_off: 0, d_off: 0 },
Cfg { len: 60, s_off: 0, d_off: 0 },
Cfg { len: 64, s_off: 0, d_off: 0 },
Cfg { len: 68, s_off: 0, d_off: 0 },
Cfg { len: 128, s_off: 0, d_off: 0 },
Cfg { len: 256, s_off: 0, d_off: 0 },
Cfg { len: 512, s_off: 0, d_off: 0 },
Cfg { len: 1024, s_off: 0, d_off: 0 },
Cfg { len: 4096, s_off: 0, d_off: 0 },
Cfg { len: 1048576, s_off: 0, d_off: 0 },
],
setup = setup,
)]
#[benches::offset(
args = [
Cfg { len: 16, s_off: 65, d_off: 65 },
Cfg { len: 28, s_off: 65, d_off: 65 },
Cfg { len: 32, s_off: 65, d_off: 65 },
Cfg { len: 36, s_off: 65, d_off: 65 },
Cfg { len: 60, s_off: 65, d_off: 65 },
Cfg { len: 64, s_off: 65, d_off: 65 },
Cfg { len: 68, s_off: 65, d_off: 65 },
Cfg { len: 128, s_off: 65, d_off: 65 },
Cfg { len: 256, s_off: 65, d_off: 65 },
Cfg { len: 512, s_off: 65, d_off: 65 },
Cfg { len: 1024, s_off: 65, d_off: 65 },
Cfg { len: 4096, s_off: 65, d_off: 65 },
Cfg { len: 1048576, s_off: 65, d_off: 65 },
],
setup = setup,
)]
#[benches::misaligned(
args = [
Cfg { len: 16, s_off: 65, d_off: 66 },
Cfg { len: 28, s_off: 65, d_off: 66 },
Cfg { len: 32, s_off: 65, d_off: 66 },
Cfg { len: 36, s_off: 65, d_off: 66 },
Cfg { len: 60, s_off: 65, d_off: 66 },
Cfg { len: 64, s_off: 65, d_off: 66 },
Cfg { len: 68, s_off: 65, d_off: 66 },
Cfg { len: 128, s_off: 65, d_off: 66 },
Cfg { len: 256, s_off: 65, d_off: 66 },
Cfg { len: 512, s_off: 65, d_off: 66 },
Cfg { len: 1024, s_off: 65, d_off: 66 },
Cfg { len: 4096, s_off: 65, d_off: 66 },
Cfg { len: 1048576, s_off: 65, d_off: 66 },
],
setup = setup,
)]
fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) {
unsafe {
black_box(memcpy(
black_box(dst.as_mut_ptr()),
black_box(src.as_ptr()),
black_box(len),
));
}
}
library_benchmark_group!(name = memcpy; benchmarks = bench);
}
mod mset {
use super::*;
struct Cfg {
len: usize,
offset: usize,
}
fn setup(Cfg { len, offset }: Cfg) -> (usize, AlignedSlice) {
println!("{len} bytes, {offset} offset");
(len, AlignedSlice::new_zeroed(len, offset))
}
#[library_benchmark]
#[benches::aligned(
args = [
Cfg { len: 16, offset: 0 },
Cfg { len: 32, offset: 0 },
Cfg { len: 64, offset: 0 },
Cfg { len: 512, offset: 0 },
Cfg { len: 4096, offset: 0 },
Cfg { len: 1048576, offset: 0 },
],
setup = setup,
)]
#[benches::offset(
args = [
Cfg { len: 16, offset: 65 },
Cfg { len: 32, offset: 65 },
Cfg { len: 64, offset: 65 },
Cfg { len: 512, offset: 65 },
Cfg { len: 4096, offset: 65 },
Cfg { len: 1048576, offset: 65 },
],
setup = setup,
)]
fn bench((len, mut dst): (usize, AlignedSlice)) {
unsafe {
black_box(memset(
black_box(dst.as_mut_ptr()),
black_box(27),
black_box(len),
));
}
}
library_benchmark_group!(name = memset; benchmarks = bench);
}
mod mcmp {
use super::*;
struct Cfg {
len: usize,
s_off: usize,
d_off: usize,
}
fn setup(cfg: Cfg) -> (usize, AlignedSlice, AlignedSlice) {
let Cfg { len, s_off, d_off } = cfg;
println!("{len} bytes, {s_off} src offset, {d_off} dst offset");
let b1 = AlignedSlice::new_zeroed(len, s_off);
let mut b2 = AlignedSlice::new_zeroed(len, d_off);
b2[len - 1] = 1;
(len, b1, b2)
}
#[library_benchmark]
#[benches::aligned(
args = [
Cfg { len: 16, s_off: 0, d_off: 0 },
Cfg { len: 32, s_off: 0, d_off: 0 },
Cfg { len: 64, s_off: 0, d_off: 0 },
Cfg { len: 512, s_off: 0, d_off: 0 },
Cfg { len: 4096, s_off: 0, d_off: 0 },
Cfg { len: 1048576, s_off: 0, d_off: 0 },
],
setup = setup
)]
#[benches::offset(
args = [
Cfg { len: 16, s_off: 65, d_off: 65 },
Cfg { len: 32, s_off: 65, d_off: 65 },
Cfg { len: 64, s_off: 65, d_off: 65 },
Cfg { len: 512, s_off: 65, d_off: 65 },
Cfg { len: 4096, s_off: 65, d_off: 65 },
Cfg { len: 1048576, s_off: 65, d_off: 65 },
],
setup = setup
)]
#[benches::misaligned(
args = [
Cfg { len: 16, s_off: 65, d_off: 66 },
Cfg { len: 32, s_off: 65, d_off: 66 },
Cfg { len: 64, s_off: 65, d_off: 66 },
Cfg { len: 512, s_off: 65, d_off: 66 },
Cfg { len: 4096, s_off: 65, d_off: 66 },
Cfg { len: 1048576, s_off: 65, d_off: 66 },
],
setup = setup
)]
fn bench((len, mut dst, src): (usize, AlignedSlice, AlignedSlice)) {
unsafe {
black_box(memcmp(
black_box(dst.as_mut_ptr()),
black_box(src.as_ptr()),
black_box(len),
));
}
}
library_benchmark_group!(name = memcmp; benchmarks = bench);
}
mod mmove {
use super::*;
use Spread::{Large, Medium, Small};
struct Cfg {
len: usize,
spread: Spread,
off: usize,
}
enum Spread {
/// `src` and `dst` are close.
Small,
/// `src` and `dst` are halfway offset in the buffer.
Medium,
/// `src` and `dst` only overlap by a single byte.
Large,
}
fn calculate_spread(len: usize, spread: Spread) -> usize {
match spread {
Small => 1,
Medium => len / 2,
Large => len - 1,
}
}
fn setup_forward(cfg: Cfg) -> (usize, usize, AlignedSlice) {
let Cfg { len, spread, off } = cfg;
let spread = calculate_spread(len, spread);
println!("{len} bytes, {spread} spread, {off} offset");
assert!(spread < len, "otherwise this just tests memcpy");
let mut buf = AlignedSlice::new_zeroed(len + spread, off);
let mut fill: usize = 0;
buf[..len].fill_with(|| {
fill += 1;
fill as u8
});
(len, spread, buf)
}
fn setup_backward(cfg: Cfg) -> (usize, usize, AlignedSlice) {
let Cfg { len, spread, off } = cfg;
let spread = calculate_spread(len, spread);
println!("{len} bytes, {spread} spread, {off} offset");
assert!(spread < len, "otherwise this just tests memcpy");
let mut buf = AlignedSlice::new_zeroed(len + spread, off);
let mut fill: usize = 0;
buf[spread..].fill_with(|| {
fill += 1;
fill as u8
});
(len, spread, buf)
}
#[library_benchmark]
#[benches::small_spread(
args = [
Cfg { len: 16, spread: Small, off: 0 },
Cfg { len: 32, spread: Small, off: 0 },
Cfg { len: 64, spread: Small, off: 0 },
Cfg { len: 512, spread: Small, off: 0 },
Cfg { len: 4096, spread: Small, off: 0 },
Cfg { len: 1048576, spread: Small, off: 0 },
],
setup = setup_forward
)]
#[benches::medium_spread(
args = [
Cfg { len: 16, spread: Medium, off: 0 },
Cfg { len: 32, spread: Medium, off: 0 },
Cfg { len: 64, spread: Medium, off: 0 },
Cfg { len: 512, spread: Medium, off: 0 },
Cfg { len: 4096, spread: Medium, off: 0 },
Cfg { len: 1048576, spread: Medium, off: 0 },
],
setup = setup_forward
)]
#[benches::large_spread(
args = [
Cfg { len: 16, spread: Large, off: 0 },
Cfg { len: 32, spread: Large, off: 0 },
Cfg { len: 64, spread: Large, off: 0 },
Cfg { len: 512, spread: Large, off: 0 },
Cfg { len: 4096, spread: Large, off: 0 },
Cfg { len: 1048576, spread: Large, off: 0 },
],
setup = setup_forward
)]
#[benches::small_spread_offset(
args = [
Cfg { len: 16, spread: Small, off: 63 },
Cfg { len: 32, spread: Small, off: 63 },
Cfg { len: 64, spread: Small, off: 63 },
Cfg { len: 512, spread: Small, off: 63 },
Cfg { len: 4096, spread: Small, off: 63 },
Cfg { len: 1048576, spread: Small, off: 63 },
],
setup = setup_forward
)]
#[benches::medium_spread_offset(
args = [
Cfg { len: 16, spread: Medium, off: 63 },
Cfg { len: 32, spread: Medium, off: 63 },
Cfg { len: 64, spread: Medium, off: 63 },
Cfg { len: 512, spread: Medium, off: 63 },
Cfg { len: 4096, spread: Medium, off: 63 },
Cfg { len: 1048576, spread: Medium, off: 63 },
],
setup = setup_forward
)]
#[benches::large_spread_offset(
args = [
Cfg { len: 16, spread: Large, off: 63 },
Cfg { len: 32, spread: Large, off: 63 },
Cfg { len: 64, spread: Large, off: 63 },
Cfg { len: 512, spread: Large, off: 63 },
Cfg { len: 4096, spread: Large, off: 63 },
Cfg { len: 1048576, spread: Large, off: 63 },
],
setup = setup_forward
)]
fn forward((len, spread, mut buf): (usize, usize, AlignedSlice)) {
// Test moving from the start of the buffer toward the end
unsafe {
black_box(memmove(
black_box(buf[spread..].as_mut_ptr()),
black_box(buf.as_ptr()),
black_box(len),
));
}
}
#[library_benchmark]
#[benches::small_spread(
args = [
Cfg { len: 16, spread: Small, off: 0 },
Cfg { len: 32, spread: Small, off: 0 },
Cfg { len: 64, spread: Small, off: 0 },
Cfg { len: 512, spread: Small, off: 0 },
Cfg { len: 4096, spread: Small, off: 0 },
Cfg { len: 1048576, spread: Small, off: 0 },
],
setup = setup_backward
)]
#[benches::middle(
args = [
Cfg { len: 16, spread: Medium, off: 0 },
Cfg { len: 32, spread: Medium, off: 0 },
Cfg { len: 64, spread: Medium, off: 0 },
Cfg { len: 512, spread: Medium, off: 0 },
Cfg { len: 4096, spread: Medium, off: 0 },
Cfg { len: 1048576, spread: Medium, off: 0 },
],
setup = setup_backward
)]
#[benches::large_spread(
args = [
Cfg { len: 16, spread: Large, off: 0 },
Cfg { len: 32, spread: Large, off: 0 },
Cfg { len: 64, spread: Large, off: 0 },
Cfg { len: 512, spread: Large, off: 0 },
Cfg { len: 4096, spread: Large, off: 0 },
Cfg { len: 1048576, spread: Large, off: 0 },
],
setup = setup_backward
)]
#[benches::small_spread_off(
args = [
Cfg { len: 16, spread: Small, off: 63 },
Cfg { len: 32, spread: Small, off: 63 },
Cfg { len: 64, spread: Small, off: 63 },
Cfg { len: 512, spread: Small, off: 63 },
Cfg { len: 4096, spread: Small, off: 63 },
Cfg { len: 1048576, spread: Small, off: 63 },
],
setup = setup_backward
)]
#[benches::middle_off(
args = [
Cfg { len: 16, spread: Medium, off: 63 },
Cfg { len: 32, spread: Medium, off: 63 },
Cfg { len: 64, spread: Medium, off: 63 },
Cfg { len: 512, spread: Medium, off: 63 },
Cfg { len: 4096, spread: Medium, off: 63 },
Cfg { len: 1048576, spread: Medium, off: 63 },
],
setup = setup_backward
)]
#[benches::large_spread_off(
args = [
Cfg { len: 16, spread: Large, off: 63 },
Cfg { len: 32, spread: Large, off: 63 },
Cfg { len: 64, spread: Large, off: 63 },
Cfg { len: 512, spread: Large, off: 63 },
Cfg { len: 4096, spread: Large, off: 63 },
Cfg { len: 1048576, spread: Large, off: 63 },
],
setup = setup_backward
)]
fn backward((len, spread, mut buf): (usize, usize, AlignedSlice)) {
// Test moving from the end of the buffer toward the start
unsafe {
black_box(memmove(
black_box(buf.as_mut_ptr()),
black_box(buf[spread..].as_ptr()),
black_box(len),
));
}
}
library_benchmark_group!(name = memmove; benchmarks = forward, backward);
}
use mcmp::memcmp;
use mcpy::memcpy;
use mmove::memmove;
use mset::memset;
main!(library_benchmark_groups = memcpy, memset, memcmp, memmove);