Merge pull request #467 from Demindiro/memcmp-x86_64

This commit is contained in:
Amanieu d'Antras 2022-05-31 18:18:47 +02:00 committed by GitHub
commit b6107b307d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 182 additions and 20 deletions

View file

@ -6,6 +6,7 @@
#![feature(compiler_builtins)]
#![feature(core_ffi_c)]
#![feature(core_intrinsics)]
#![feature(inline_const)]
#![feature(lang_items)]
#![feature(linkage)]
#![feature(naked_functions)]

View file

@ -265,3 +265,17 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
}
set_bytes_bytes(s, c, n);
}
#[inline(always)]
pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 {
let mut i = 0;
while i < n {
let a = *s1.add(i);
let b = *s2.add(i);
if a != b {
return a as i32 - b as i32;
}
i += 1;
}
0
}

View file

@ -51,16 +51,7 @@ intrinsics! {
#[mem_builtin]
#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
let mut i = 0;
while i < n {
let a = *s1.add(i);
let b = *s2.add(i);
if a != b {
return a as i32 - b as i32;
}
i += 1;
}
0
impls::compare_bytes(s1, s2, n)
}
#[mem_builtin]

View file

@ -16,6 +16,9 @@
// feature is present at compile-time. We don't bother detecting other features.
// Note that ERMSB does not enhance the backwards (DF=1) "rep movsb".
use core::intrinsics;
use core::mem;
#[inline(always)]
#[cfg(target_feature = "ermsb")]
pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) {
@ -98,3 +101,47 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) {
options(att_syntax, nostack, preserves_flags)
);
}
#[inline(always)]
pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 {
#[inline(always)]
unsafe fn cmp<T, U, F>(mut a: *const T, mut b: *const T, n: usize, f: F) -> i32
where
T: Clone + Copy + Eq,
U: Clone + Copy + Eq,
F: FnOnce(*const U, *const U, usize) -> i32,
{
// Ensure T is not a ZST.
const { assert!(mem::size_of::<T>() != 0) };
let end = a.add(intrinsics::unchecked_div(n, mem::size_of::<T>()));
while a != end {
if a.read_unaligned() != b.read_unaligned() {
return f(a.cast(), b.cast(), mem::size_of::<T>());
}
a = a.add(1);
b = b.add(1);
}
f(
a.cast(),
b.cast(),
intrinsics::unchecked_rem(n, mem::size_of::<T>()),
)
}
let c1 = |mut a: *const u8, mut b: *const u8, n| {
for _ in 0..n {
if a.read() != b.read() {
return i32::from(a.read()) - i32::from(b.read());
}
a = a.add(1);
b = b.add(1);
}
0
};
let c2 = |a: *const u16, b, n| cmp(a, b, n, c1);
let c4 = |a: *const u32, b, n| cmp(a, b, n, c2);
let c8 = |a: *const u64, b, n| cmp(a, b, n, c4);
let c16 = |a: *const u128, b, n| cmp(a, b, n, c8);
let c32 = |a: *const [u128; 2], b, n| cmp(a, b, n, c16);
c32(a.cast(), b.cast(), n)
}

View file

@ -96,6 +96,18 @@ fn memcmp_builtin(b: &mut Bencher, n: usize) {
})
}
fn memcmp_builtin_unaligned(b: &mut Bencher, n: usize) {
let v1 = AlignedVec::new(0, n);
let mut v2 = AlignedVec::new(0, n);
v2[n - 1] = 1;
b.bytes = n as u64;
b.iter(|| {
let s1: &[u8] = black_box(&v1[0..]);
let s2: &[u8] = black_box(&v2[1..]);
s1.cmp(s2)
})
}
fn memcmp_rust(b: &mut Bencher, n: usize) {
let v1 = AlignedVec::new(0, n);
let mut v2 = AlignedVec::new(0, n);
@ -108,6 +120,18 @@ fn memcmp_rust(b: &mut Bencher, n: usize) {
})
}
fn memcmp_rust_unaligned(b: &mut Bencher, n: usize) {
let v1 = AlignedVec::new(0, n);
let mut v2 = AlignedVec::new(0, n);
v2[n - 1] = 1;
b.bytes = n as u64;
b.iter(|| {
let s1: &[u8] = black_box(&v1[0..]);
let s2: &[u8] = black_box(&v2[1..]);
unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n - 1) }
})
}
fn memmove_builtin(b: &mut Bencher, n: usize, offset: usize) {
let mut v = AlignedVec::new(0, n + n / 2 + offset);
b.bytes = n as u64;
@ -209,6 +233,38 @@ fn memset_rust_1048576_offset(b: &mut Bencher) {
memset_rust(b, 1048576, 65)
}
#[bench]
fn memcmp_builtin_8(b: &mut Bencher) {
memcmp_builtin(b, 8)
}
#[bench]
fn memcmp_rust_8(b: &mut Bencher) {
memcmp_rust(b, 8)
}
#[bench]
fn memcmp_builtin_16(b: &mut Bencher) {
memcmp_builtin(b, 16)
}
#[bench]
fn memcmp_rust_16(b: &mut Bencher) {
memcmp_rust(b, 16)
}
#[bench]
fn memcmp_builtin_32(b: &mut Bencher) {
memcmp_builtin(b, 32)
}
#[bench]
fn memcmp_rust_32(b: &mut Bencher) {
memcmp_rust(b, 32)
}
#[bench]
fn memcmp_builtin_64(b: &mut Bencher) {
memcmp_builtin(b, 64)
}
#[bench]
fn memcmp_rust_64(b: &mut Bencher) {
memcmp_rust(b, 64)
}
#[bench]
fn memcmp_builtin_4096(b: &mut Bencher) {
memcmp_builtin(b, 4096)
@ -225,6 +281,54 @@ fn memcmp_builtin_1048576(b: &mut Bencher) {
fn memcmp_rust_1048576(b: &mut Bencher) {
memcmp_rust(b, 1048576)
}
#[bench]
fn memcmp_builtin_unaligned_7(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 8)
}
#[bench]
fn memcmp_rust_unaligned_7(b: &mut Bencher) {
memcmp_rust_unaligned(b, 8)
}
#[bench]
fn memcmp_builtin_unaligned_15(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 16)
}
#[bench]
fn memcmp_rust_unaligned_15(b: &mut Bencher) {
memcmp_rust_unaligned(b, 16)
}
#[bench]
fn memcmp_builtin_unaligned_31(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 32)
}
#[bench]
fn memcmp_rust_unaligned_31(b: &mut Bencher) {
memcmp_rust_unaligned(b, 32)
}
#[bench]
fn memcmp_builtin_unaligned_63(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 64)
}
#[bench]
fn memcmp_rust_unaligned_63(b: &mut Bencher) {
memcmp_rust_unaligned(b, 64)
}
#[bench]
fn memcmp_builtin_unaligned_4095(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 4096)
}
#[bench]
fn memcmp_rust_unaligned_4095(b: &mut Bencher) {
memcmp_rust_unaligned(b, 4096)
}
#[bench]
fn memcmp_builtin_unaligned_1048575(b: &mut Bencher) {
memcmp_builtin_unaligned(b, 1048576)
}
#[bench]
fn memcmp_rust_unaligned_1048575(b: &mut Bencher) {
memcmp_rust_unaligned(b, 1048576)
}
#[bench]
fn memmove_builtin_4096(b: &mut Bencher) {

View file

@ -116,21 +116,26 @@ fn memset_nonzero() {
#[test]
fn memcmp_eq() {
let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
unsafe {
assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8), 0);
assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 3), 0);
let arr1 @ arr2 = gen_arr::<256>();
for i in 0..256 {
unsafe {
assert_eq!(memcmp(arr1.0.as_ptr(), arr2.0.as_ptr(), i), 0);
assert_eq!(memcmp(arr2.0.as_ptr(), arr1.0.as_ptr(), i), 0);
}
}
}
#[test]
fn memcmp_ne() {
let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 7, 7];
unsafe {
assert!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8) < 0);
assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 8) > 0);
let arr1 @ arr2 = gen_arr::<256>();
for i in 0..256 {
let mut diff_arr = arr1;
diff_arr.0[i] = 127;
let expect = diff_arr.0[i].cmp(&arr2.0[i]);
for k in i + 1..256 {
let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) };
assert_eq!(expect, result.cmp(&0));
}
}
}