Merge pull request #467 from Demindiro/memcmp-x86_64
This commit is contained in:
commit
b6107b307d
6 changed files with 182 additions and 20 deletions
|
|
@ -6,6 +6,7 @@
|
|||
#![feature(compiler_builtins)]
|
||||
#![feature(core_ffi_c)]
|
||||
#![feature(core_intrinsics)]
|
||||
#![feature(inline_const)]
|
||||
#![feature(lang_items)]
|
||||
#![feature(linkage)]
|
||||
#![feature(naked_functions)]
|
||||
|
|
|
|||
|
|
@ -265,3 +265,17 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
|
|||
}
|
||||
set_bytes_bytes(s, c, n);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 {
|
||||
let mut i = 0;
|
||||
while i < n {
|
||||
let a = *s1.add(i);
|
||||
let b = *s2.add(i);
|
||||
if a != b {
|
||||
return a as i32 - b as i32;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
0
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,16 +51,7 @@ intrinsics! {
|
|||
#[mem_builtin]
|
||||
#[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")]
|
||||
pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 {
|
||||
let mut i = 0;
|
||||
while i < n {
|
||||
let a = *s1.add(i);
|
||||
let b = *s2.add(i);
|
||||
if a != b {
|
||||
return a as i32 - b as i32;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
0
|
||||
impls::compare_bytes(s1, s2, n)
|
||||
}
|
||||
|
||||
#[mem_builtin]
|
||||
|
|
|
|||
|
|
@ -16,6 +16,9 @@
|
|||
// feature is present at compile-time. We don't bother detecting other features.
|
||||
// Note that ERMSB does not enhance the backwards (DF=1) "rep movsb".
|
||||
|
||||
use core::intrinsics;
|
||||
use core::mem;
|
||||
|
||||
#[inline(always)]
|
||||
#[cfg(target_feature = "ermsb")]
|
||||
pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) {
|
||||
|
|
@ -98,3 +101,47 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) {
|
|||
options(att_syntax, nostack, preserves_flags)
|
||||
);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 {
|
||||
#[inline(always)]
|
||||
unsafe fn cmp<T, U, F>(mut a: *const T, mut b: *const T, n: usize, f: F) -> i32
|
||||
where
|
||||
T: Clone + Copy + Eq,
|
||||
U: Clone + Copy + Eq,
|
||||
F: FnOnce(*const U, *const U, usize) -> i32,
|
||||
{
|
||||
// Ensure T is not a ZST.
|
||||
const { assert!(mem::size_of::<T>() != 0) };
|
||||
|
||||
let end = a.add(intrinsics::unchecked_div(n, mem::size_of::<T>()));
|
||||
while a != end {
|
||||
if a.read_unaligned() != b.read_unaligned() {
|
||||
return f(a.cast(), b.cast(), mem::size_of::<T>());
|
||||
}
|
||||
a = a.add(1);
|
||||
b = b.add(1);
|
||||
}
|
||||
f(
|
||||
a.cast(),
|
||||
b.cast(),
|
||||
intrinsics::unchecked_rem(n, mem::size_of::<T>()),
|
||||
)
|
||||
}
|
||||
let c1 = |mut a: *const u8, mut b: *const u8, n| {
|
||||
for _ in 0..n {
|
||||
if a.read() != b.read() {
|
||||
return i32::from(a.read()) - i32::from(b.read());
|
||||
}
|
||||
a = a.add(1);
|
||||
b = b.add(1);
|
||||
}
|
||||
0
|
||||
};
|
||||
let c2 = |a: *const u16, b, n| cmp(a, b, n, c1);
|
||||
let c4 = |a: *const u32, b, n| cmp(a, b, n, c2);
|
||||
let c8 = |a: *const u64, b, n| cmp(a, b, n, c4);
|
||||
let c16 = |a: *const u128, b, n| cmp(a, b, n, c8);
|
||||
let c32 = |a: *const [u128; 2], b, n| cmp(a, b, n, c16);
|
||||
c32(a.cast(), b.cast(), n)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,6 +96,18 @@ fn memcmp_builtin(b: &mut Bencher, n: usize) {
|
|||
})
|
||||
}
|
||||
|
||||
fn memcmp_builtin_unaligned(b: &mut Bencher, n: usize) {
|
||||
let v1 = AlignedVec::new(0, n);
|
||||
let mut v2 = AlignedVec::new(0, n);
|
||||
v2[n - 1] = 1;
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let s1: &[u8] = black_box(&v1[0..]);
|
||||
let s2: &[u8] = black_box(&v2[1..]);
|
||||
s1.cmp(s2)
|
||||
})
|
||||
}
|
||||
|
||||
fn memcmp_rust(b: &mut Bencher, n: usize) {
|
||||
let v1 = AlignedVec::new(0, n);
|
||||
let mut v2 = AlignedVec::new(0, n);
|
||||
|
|
@ -108,6 +120,18 @@ fn memcmp_rust(b: &mut Bencher, n: usize) {
|
|||
})
|
||||
}
|
||||
|
||||
fn memcmp_rust_unaligned(b: &mut Bencher, n: usize) {
|
||||
let v1 = AlignedVec::new(0, n);
|
||||
let mut v2 = AlignedVec::new(0, n);
|
||||
v2[n - 1] = 1;
|
||||
b.bytes = n as u64;
|
||||
b.iter(|| {
|
||||
let s1: &[u8] = black_box(&v1[0..]);
|
||||
let s2: &[u8] = black_box(&v2[1..]);
|
||||
unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n - 1) }
|
||||
})
|
||||
}
|
||||
|
||||
fn memmove_builtin(b: &mut Bencher, n: usize, offset: usize) {
|
||||
let mut v = AlignedVec::new(0, n + n / 2 + offset);
|
||||
b.bytes = n as u64;
|
||||
|
|
@ -209,6 +233,38 @@ fn memset_rust_1048576_offset(b: &mut Bencher) {
|
|||
memset_rust(b, 1048576, 65)
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn memcmp_builtin_8(b: &mut Bencher) {
|
||||
memcmp_builtin(b, 8)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_8(b: &mut Bencher) {
|
||||
memcmp_rust(b, 8)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_16(b: &mut Bencher) {
|
||||
memcmp_builtin(b, 16)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_16(b: &mut Bencher) {
|
||||
memcmp_rust(b, 16)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_32(b: &mut Bencher) {
|
||||
memcmp_builtin(b, 32)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_32(b: &mut Bencher) {
|
||||
memcmp_rust(b, 32)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_64(b: &mut Bencher) {
|
||||
memcmp_builtin(b, 64)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_64(b: &mut Bencher) {
|
||||
memcmp_rust(b, 64)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_4096(b: &mut Bencher) {
|
||||
memcmp_builtin(b, 4096)
|
||||
|
|
@ -225,6 +281,54 @@ fn memcmp_builtin_1048576(b: &mut Bencher) {
|
|||
fn memcmp_rust_1048576(b: &mut Bencher) {
|
||||
memcmp_rust(b, 1048576)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_7(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 8)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_7(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 8)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_15(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 16)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_15(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 16)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_31(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 32)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_31(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 32)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_63(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 64)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_63(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 64)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_4095(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 4096)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_4095(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 4096)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_builtin_unaligned_1048575(b: &mut Bencher) {
|
||||
memcmp_builtin_unaligned(b, 1048576)
|
||||
}
|
||||
#[bench]
|
||||
fn memcmp_rust_unaligned_1048575(b: &mut Bencher) {
|
||||
memcmp_rust_unaligned(b, 1048576)
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn memmove_builtin_4096(b: &mut Bencher) {
|
||||
|
|
|
|||
|
|
@ -116,21 +116,26 @@ fn memset_nonzero() {
|
|||
|
||||
#[test]
|
||||
fn memcmp_eq() {
|
||||
let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
|
||||
let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
|
||||
unsafe {
|
||||
assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8), 0);
|
||||
assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 3), 0);
|
||||
let arr1 @ arr2 = gen_arr::<256>();
|
||||
for i in 0..256 {
|
||||
unsafe {
|
||||
assert_eq!(memcmp(arr1.0.as_ptr(), arr2.0.as_ptr(), i), 0);
|
||||
assert_eq!(memcmp(arr2.0.as_ptr(), arr1.0.as_ptr(), i), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn memcmp_ne() {
|
||||
let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
|
||||
let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 7, 7];
|
||||
unsafe {
|
||||
assert!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8) < 0);
|
||||
assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 8) > 0);
|
||||
let arr1 @ arr2 = gen_arr::<256>();
|
||||
for i in 0..256 {
|
||||
let mut diff_arr = arr1;
|
||||
diff_arr.0[i] = 127;
|
||||
let expect = diff_arr.0[i].cmp(&arr2.0[i]);
|
||||
for k in i + 1..256 {
|
||||
let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) };
|
||||
assert_eq!(expect, result.cmp(&0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue