diff --git a/library/compiler-builtins/src/lib.rs b/library/compiler-builtins/src/lib.rs index 009923d27e5b..acac040be332 100644 --- a/library/compiler-builtins/src/lib.rs +++ b/library/compiler-builtins/src/lib.rs @@ -6,6 +6,7 @@ #![feature(compiler_builtins)] #![feature(core_ffi_c)] #![feature(core_intrinsics)] +#![feature(inline_const)] #![feature(lang_items)] #![feature(linkage)] #![feature(naked_functions)] diff --git a/library/compiler-builtins/src/mem/impls.rs b/library/compiler-builtins/src/mem/impls.rs index 8151324254a0..72003a5c472b 100644 --- a/library/compiler-builtins/src/mem/impls.rs +++ b/library/compiler-builtins/src/mem/impls.rs @@ -265,3 +265,17 @@ pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) { } set_bytes_bytes(s, c, n); } + +#[inline(always)] +pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 { + let mut i = 0; + while i < n { + let a = *s1.add(i); + let b = *s2.add(i); + if a != b { + return a as i32 - b as i32; + } + i += 1; + } + 0 +} diff --git a/library/compiler-builtins/src/mem/mod.rs b/library/compiler-builtins/src/mem/mod.rs index a551138612bb..c5b0ddc16ec3 100644 --- a/library/compiler-builtins/src/mem/mod.rs +++ b/library/compiler-builtins/src/mem/mod.rs @@ -51,16 +51,7 @@ intrinsics! { #[mem_builtin] #[cfg_attr(not(all(target_os = "windows", target_env = "gnu")), linkage = "weak")] pub unsafe extern "C" fn memcmp(s1: *const u8, s2: *const u8, n: usize) -> i32 { - let mut i = 0; - while i < n { - let a = *s1.add(i); - let b = *s2.add(i); - if a != b { - return a as i32 - b as i32; - } - i += 1; - } - 0 + impls::compare_bytes(s1, s2, n) } #[mem_builtin] diff --git a/library/compiler-builtins/src/mem/x86_64.rs b/library/compiler-builtins/src/mem/x86_64.rs index a7ab6f605bdc..4d2f6e5ee329 100644 --- a/library/compiler-builtins/src/mem/x86_64.rs +++ b/library/compiler-builtins/src/mem/x86_64.rs @@ -16,6 +16,9 @@ // feature is present at compile-time. We don't bother detecting other features. // Note that ERMSB does not enhance the backwards (DF=1) "rep movsb". +use core::intrinsics; +use core::mem; + #[inline(always)] #[cfg(target_feature = "ermsb")] pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, count: usize) { @@ -98,3 +101,47 @@ pub unsafe fn set_bytes(dest: *mut u8, c: u8, count: usize) { options(att_syntax, nostack, preserves_flags) ); } + +#[inline(always)] +pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 { + #[inline(always)] + unsafe fn cmp(mut a: *const T, mut b: *const T, n: usize, f: F) -> i32 + where + T: Clone + Copy + Eq, + U: Clone + Copy + Eq, + F: FnOnce(*const U, *const U, usize) -> i32, + { + // Ensure T is not a ZST. + const { assert!(mem::size_of::() != 0) }; + + let end = a.add(intrinsics::unchecked_div(n, mem::size_of::())); + while a != end { + if a.read_unaligned() != b.read_unaligned() { + return f(a.cast(), b.cast(), mem::size_of::()); + } + a = a.add(1); + b = b.add(1); + } + f( + a.cast(), + b.cast(), + intrinsics::unchecked_rem(n, mem::size_of::()), + ) + } + let c1 = |mut a: *const u8, mut b: *const u8, n| { + for _ in 0..n { + if a.read() != b.read() { + return i32::from(a.read()) - i32::from(b.read()); + } + a = a.add(1); + b = b.add(1); + } + 0 + }; + let c2 = |a: *const u16, b, n| cmp(a, b, n, c1); + let c4 = |a: *const u32, b, n| cmp(a, b, n, c2); + let c8 = |a: *const u64, b, n| cmp(a, b, n, c4); + let c16 = |a: *const u128, b, n| cmp(a, b, n, c8); + let c32 = |a: *const [u128; 2], b, n| cmp(a, b, n, c16); + c32(a.cast(), b.cast(), n) +} diff --git a/library/compiler-builtins/testcrate/benches/mem.rs b/library/compiler-builtins/testcrate/benches/mem.rs index b6883a93b248..98a040958c4d 100644 --- a/library/compiler-builtins/testcrate/benches/mem.rs +++ b/library/compiler-builtins/testcrate/benches/mem.rs @@ -96,6 +96,18 @@ fn memcmp_builtin(b: &mut Bencher, n: usize) { }) } +fn memcmp_builtin_unaligned(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1[0..]); + let s2: &[u8] = black_box(&v2[1..]); + s1.cmp(s2) + }) +} + fn memcmp_rust(b: &mut Bencher, n: usize) { let v1 = AlignedVec::new(0, n); let mut v2 = AlignedVec::new(0, n); @@ -108,6 +120,18 @@ fn memcmp_rust(b: &mut Bencher, n: usize) { }) } +fn memcmp_rust_unaligned(b: &mut Bencher, n: usize) { + let v1 = AlignedVec::new(0, n); + let mut v2 = AlignedVec::new(0, n); + v2[n - 1] = 1; + b.bytes = n as u64; + b.iter(|| { + let s1: &[u8] = black_box(&v1[0..]); + let s2: &[u8] = black_box(&v2[1..]); + unsafe { memcmp(s1.as_ptr(), s2.as_ptr(), n - 1) } + }) +} + fn memmove_builtin(b: &mut Bencher, n: usize, offset: usize) { let mut v = AlignedVec::new(0, n + n / 2 + offset); b.bytes = n as u64; @@ -209,6 +233,38 @@ fn memset_rust_1048576_offset(b: &mut Bencher) { memset_rust(b, 1048576, 65) } +#[bench] +fn memcmp_builtin_8(b: &mut Bencher) { + memcmp_builtin(b, 8) +} +#[bench] +fn memcmp_rust_8(b: &mut Bencher) { + memcmp_rust(b, 8) +} +#[bench] +fn memcmp_builtin_16(b: &mut Bencher) { + memcmp_builtin(b, 16) +} +#[bench] +fn memcmp_rust_16(b: &mut Bencher) { + memcmp_rust(b, 16) +} +#[bench] +fn memcmp_builtin_32(b: &mut Bencher) { + memcmp_builtin(b, 32) +} +#[bench] +fn memcmp_rust_32(b: &mut Bencher) { + memcmp_rust(b, 32) +} +#[bench] +fn memcmp_builtin_64(b: &mut Bencher) { + memcmp_builtin(b, 64) +} +#[bench] +fn memcmp_rust_64(b: &mut Bencher) { + memcmp_rust(b, 64) +} #[bench] fn memcmp_builtin_4096(b: &mut Bencher) { memcmp_builtin(b, 4096) @@ -225,6 +281,54 @@ fn memcmp_builtin_1048576(b: &mut Bencher) { fn memcmp_rust_1048576(b: &mut Bencher) { memcmp_rust(b, 1048576) } +#[bench] +fn memcmp_builtin_unaligned_7(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 8) +} +#[bench] +fn memcmp_rust_unaligned_7(b: &mut Bencher) { + memcmp_rust_unaligned(b, 8) +} +#[bench] +fn memcmp_builtin_unaligned_15(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 16) +} +#[bench] +fn memcmp_rust_unaligned_15(b: &mut Bencher) { + memcmp_rust_unaligned(b, 16) +} +#[bench] +fn memcmp_builtin_unaligned_31(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 32) +} +#[bench] +fn memcmp_rust_unaligned_31(b: &mut Bencher) { + memcmp_rust_unaligned(b, 32) +} +#[bench] +fn memcmp_builtin_unaligned_63(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 64) +} +#[bench] +fn memcmp_rust_unaligned_63(b: &mut Bencher) { + memcmp_rust_unaligned(b, 64) +} +#[bench] +fn memcmp_builtin_unaligned_4095(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 4096) +} +#[bench] +fn memcmp_rust_unaligned_4095(b: &mut Bencher) { + memcmp_rust_unaligned(b, 4096) +} +#[bench] +fn memcmp_builtin_unaligned_1048575(b: &mut Bencher) { + memcmp_builtin_unaligned(b, 1048576) +} +#[bench] +fn memcmp_rust_unaligned_1048575(b: &mut Bencher) { + memcmp_rust_unaligned(b, 1048576) +} #[bench] fn memmove_builtin_4096(b: &mut Bencher) { diff --git a/library/compiler-builtins/testcrate/tests/mem.rs b/library/compiler-builtins/testcrate/tests/mem.rs index 3f20e72a04c8..48ac95adc17f 100644 --- a/library/compiler-builtins/testcrate/tests/mem.rs +++ b/library/compiler-builtins/testcrate/tests/mem.rs @@ -116,21 +116,26 @@ fn memset_nonzero() { #[test] fn memcmp_eq() { - let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; - let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; - unsafe { - assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8), 0); - assert_eq!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 3), 0); + let arr1 @ arr2 = gen_arr::<256>(); + for i in 0..256 { + unsafe { + assert_eq!(memcmp(arr1.0.as_ptr(), arr2.0.as_ptr(), i), 0); + assert_eq!(memcmp(arr2.0.as_ptr(), arr1.0.as_ptr(), i), 0); + } } } #[test] fn memcmp_ne() { - let arr1: [u8; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; - let arr2: [u8; 8] = [0, 1, 2, 3, 4, 5, 7, 7]; - unsafe { - assert!(memcmp(arr1.as_ptr(), arr2.as_ptr(), 8) < 0); - assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 8) > 0); + let arr1 @ arr2 = gen_arr::<256>(); + for i in 0..256 { + let mut diff_arr = arr1; + diff_arr.0[i] = 127; + let expect = diff_arr.0[i].cmp(&arr2.0[i]); + for k in i + 1..256 { + let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) }; + assert_eq!(expect, result.cmp(&0)); + } } }