Merge pull request #405 from nbdd0121/master

This commit is contained in:
Amanieu d'Antras 2021-08-31 23:21:36 +01:00 committed by GitHub
commit e63fc50bcc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 482 additions and 52 deletions

View file

@ -33,6 +33,11 @@ fn main() {
println!("cargo:rustc-cfg=feature=\"mem\"");
}
// These targets have hardware unaligned access support.
if target.contains("x86_64") || target.contains("i686") || target.contains("aarch64") {
println!("cargo:rustc-cfg=feature=\"mem-unaligned\"");
}
// NOTE we are going to assume that llvm-target, what determines our codegen option, matches the
// target triple. This is usually correct for our built-in targets but can break in presence of
// custom targets, which can have arbitrary names.

View file

@ -1,27 +1,257 @@
#[inline(always)]
pub unsafe fn copy_forward(dest: *mut u8, src: *const u8, n: usize) {
let mut i = 0;
while i < n {
*dest.add(i) = *src.add(i);
i += 1;
}
use core::intrinsics::likely;
const WORD_SIZE: usize = core::mem::size_of::<usize>();
const WORD_MASK: usize = WORD_SIZE - 1;
// If the number of bytes involved exceed this threshold we will opt in word-wise copy.
// The value here selected is max(2 * WORD_SIZE, 16):
// * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through
// word-wise copy.
// * The word-wise copy logic needs to perform some checks so it has some small overhead.
// ensures that even on 32-bit platforms we have copied at least 8 bytes through
// word-wise copy so the saving of word-wise copy outweights the fixed overhead.
const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 {
2 * WORD_SIZE
} else {
16
};
#[cfg(feature = "mem-unaligned")]
unsafe fn read_usize_unaligned(x: *const usize) -> usize {
// Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which
// is translated to memcpy in LLVM.
let x_read = (x as *const [u8; core::mem::size_of::<usize>()]).read();
core::mem::transmute(x_read)
}
#[inline(always)]
pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, n: usize) {
// copy from end
let mut i = n;
while i != 0 {
i -= 1;
*dest.add(i) = *src.add(i);
pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
#[inline(always)]
unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
let dest_end = dest.add(n);
while dest < dest_end {
*dest = *src;
dest = dest.add(1);
src = src.add(1);
}
}
#[inline(always)]
unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
let mut dest_usize = dest as *mut usize;
let mut src_usize = src as *mut usize;
let dest_end = dest.add(n) as *mut usize;
while dest_usize < dest_end {
*dest_usize = *src_usize;
dest_usize = dest_usize.add(1);
src_usize = src_usize.add(1);
}
}
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
let mut dest_usize = dest as *mut usize;
let dest_end = dest.add(n) as *mut usize;
// Calculate the misalignment offset and shift needed to reassemble value.
let offset = src as usize & WORD_MASK;
let shift = offset * 8;
// Realign src
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
// This will read (but won't use) bytes out of bound.
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
while dest_usize < dest_end {
src_aligned = src_aligned.add(1);
let cur_word = *src_aligned;
#[cfg(target_endian = "little")]
let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
#[cfg(target_endian = "big")]
let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift);
prev_word = cur_word;
*dest_usize = resembled;
dest_usize = dest_usize.add(1);
}
}
#[cfg(feature = "mem-unaligned")]
#[inline(always)]
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
let mut dest_usize = dest as *mut usize;
let mut src_usize = src as *mut usize;
let dest_end = dest.add(n) as *mut usize;
while dest_usize < dest_end {
*dest_usize = read_usize_unaligned(src_usize);
dest_usize = dest_usize.add(1);
src_usize = src_usize.add(1);
}
}
if n >= WORD_COPY_THRESHOLD {
// Align dest
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK;
copy_forward_bytes(dest, src, dest_misalignment);
dest = dest.add(dest_misalignment);
src = src.add(dest_misalignment);
n -= dest_misalignment;
let n_words = n & !WORD_MASK;
let src_misalignment = src as usize & WORD_MASK;
if likely(src_misalignment == 0) {
copy_forward_aligned_words(dest, src, n_words);
} else {
copy_forward_misaligned_words(dest, src, n_words);
}
dest = dest.add(n_words);
src = src.add(n_words);
n -= n_words;
}
copy_forward_bytes(dest, src, n);
}
#[inline(always)]
pub unsafe fn set_bytes(s: *mut u8, c: u8, n: usize) {
let mut i = 0;
while i < n {
*s.add(i) = c;
i += 1;
pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
// The following backward copy helper functions uses the pointers past the end
// as their inputs instead of pointers to the start!
#[inline(always)]
unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
let dest_start = dest.sub(n);
while dest_start < dest {
dest = dest.sub(1);
src = src.sub(1);
*dest = *src;
}
}
#[inline(always)]
unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
let mut dest_usize = dest as *mut usize;
let mut src_usize = src as *mut usize;
let dest_start = dest.sub(n) as *mut usize;
while dest_start < dest_usize {
dest_usize = dest_usize.sub(1);
src_usize = src_usize.sub(1);
*dest_usize = *src_usize;
}
}
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
let mut dest_usize = dest as *mut usize;
let dest_start = dest.sub(n) as *mut usize;
// Calculate the misalignment offset and shift needed to reassemble value.
let offset = src as usize & WORD_MASK;
let shift = offset * 8;
// Realign src_aligned
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
// This will read (but won't use) bytes out of bound.
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
while dest_start < dest_usize {
src_aligned = src_aligned.sub(1);
let cur_word = *src_aligned;
#[cfg(target_endian = "little")]
let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
#[cfg(target_endian = "big")]
let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift;
prev_word = cur_word;
dest_usize = dest_usize.sub(1);
*dest_usize = resembled;
}
}
#[cfg(feature = "mem-unaligned")]
#[inline(always)]
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
let mut dest_usize = dest as *mut usize;
let mut src_usize = src as *mut usize;
let dest_start = dest.sub(n) as *mut usize;
while dest_start < dest_usize {
dest_usize = dest_usize.sub(1);
src_usize = src_usize.sub(1);
*dest_usize = read_usize_unaligned(src_usize);
}
}
let mut dest = dest.add(n);
let mut src = src.add(n);
if n >= WORD_COPY_THRESHOLD {
// Align dest
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
let dest_misalignment = dest as usize & WORD_MASK;
copy_backward_bytes(dest, src, dest_misalignment);
dest = dest.sub(dest_misalignment);
src = src.sub(dest_misalignment);
n -= dest_misalignment;
let n_words = n & !WORD_MASK;
let src_misalignment = src as usize & WORD_MASK;
if likely(src_misalignment == 0) {
copy_backward_aligned_words(dest, src, n_words);
} else {
copy_backward_misaligned_words(dest, src, n_words);
}
dest = dest.sub(n_words);
src = src.sub(n_words);
n -= n_words;
}
copy_backward_bytes(dest, src, n);
}
#[inline(always)]
pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
#[inline(always)]
pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) {
let end = s.add(n);
while s < end {
*s = c;
s = s.add(1);
}
}
#[inline(always)]
pub unsafe fn set_bytes_words(s: *mut u8, c: u8, n: usize) {
let mut broadcast = c as usize;
let mut bits = 8;
while bits < WORD_SIZE * 8 {
broadcast |= broadcast << bits;
bits *= 2;
}
let mut s_usize = s as *mut usize;
let end = s.add(n) as *mut usize;
while s_usize < end {
*s_usize = broadcast;
s_usize = s_usize.add(1);
}
}
if likely(n >= WORD_COPY_THRESHOLD) {
// Align s
// Because of n >= 2 * WORD_SIZE, dst_misalignment < n
let misalignment = (s as usize).wrapping_neg() & WORD_MASK;
set_bytes_bytes(s, c, misalignment);
s = s.add(misalignment);
n -= misalignment;
let n_words = n & !WORD_MASK;
set_bytes_words(s, c, n_words);
s = s.add(n_words);
n -= n_words;
}
set_bytes_bytes(s, c, n);
}

View file

@ -6,30 +6,64 @@ use test::{black_box, Bencher};
extern crate compiler_builtins;
use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
fn memcpy_builtin(b: &mut Bencher, n: usize, offset: usize) {
let v1 = vec![1u8; n + offset];
let mut v2 = vec![0u8; n + offset];
const WORD_SIZE: usize = core::mem::size_of::<usize>();
struct AlignedVec {
vec: Vec<usize>,
size: usize,
}
impl AlignedVec {
fn new(fill: u8, size: usize) -> Self {
let mut broadcast = fill as usize;
let mut bits = 8;
while bits < WORD_SIZE * 8 {
broadcast |= broadcast << bits;
bits *= 2;
}
let vec = vec![broadcast; (size + WORD_SIZE - 1) & !WORD_SIZE];
AlignedVec { vec, size }
}
}
impl core::ops::Deref for AlignedVec {
type Target = [u8];
fn deref(&self) -> &[u8] {
unsafe { core::slice::from_raw_parts(self.vec.as_ptr() as *const u8, self.size) }
}
}
impl core::ops::DerefMut for AlignedVec {
fn deref_mut(&mut self) -> &mut [u8] {
unsafe { core::slice::from_raw_parts_mut(self.vec.as_mut_ptr() as *mut u8, self.size) }
}
}
fn memcpy_builtin(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) {
let v1 = AlignedVec::new(1, n + offset1);
let mut v2 = AlignedVec::new(0, n + offset2);
b.bytes = n as u64;
b.iter(|| {
let src: &[u8] = black_box(&v1[offset..]);
let dst: &mut [u8] = black_box(&mut v2[offset..]);
let src: &[u8] = black_box(&v1[offset1..]);
let dst: &mut [u8] = black_box(&mut v2[offset2..]);
dst.copy_from_slice(src);
})
}
fn memcpy_rust(b: &mut Bencher, n: usize, offset: usize) {
let v1 = vec![1u8; n + offset];
let mut v2 = vec![0u8; n + offset];
fn memcpy_rust(b: &mut Bencher, n: usize, offset1: usize, offset2: usize) {
let v1 = AlignedVec::new(1, n + offset1);
let mut v2 = AlignedVec::new(0, n + offset2);
b.bytes = n as u64;
b.iter(|| {
let src: &[u8] = black_box(&v1[offset..]);
let dst: &mut [u8] = black_box(&mut v2[offset..]);
let src: &[u8] = black_box(&v1[offset1..]);
let dst: &mut [u8] = black_box(&mut v2[offset2..]);
unsafe { memcpy(dst.as_mut_ptr(), src.as_ptr(), n) }
})
}
fn memset_builtin(b: &mut Bencher, n: usize, offset: usize) {
let mut v1 = vec![0u8; n + offset];
let mut v1 = AlignedVec::new(0, n + offset);
b.bytes = n as u64;
b.iter(|| {
let dst: &mut [u8] = black_box(&mut v1[offset..]);
@ -41,7 +75,7 @@ fn memset_builtin(b: &mut Bencher, n: usize, offset: usize) {
}
fn memset_rust(b: &mut Bencher, n: usize, offset: usize) {
let mut v1 = vec![0u8; n + offset];
let mut v1 = AlignedVec::new(0, n + offset);
b.bytes = n as u64;
b.iter(|| {
let dst: &mut [u8] = black_box(&mut v1[offset..]);
@ -51,8 +85,8 @@ fn memset_rust(b: &mut Bencher, n: usize, offset: usize) {
}
fn memcmp_builtin(b: &mut Bencher, n: usize) {
let v1 = vec![0u8; n];
let mut v2 = vec![0u8; n];
let v1 = AlignedVec::new(0, n);
let mut v2 = AlignedVec::new(0, n);
v2[n - 1] = 1;
b.bytes = n as u64;
b.iter(|| {
@ -63,8 +97,8 @@ fn memcmp_builtin(b: &mut Bencher, n: usize) {
}
fn memcmp_rust(b: &mut Bencher, n: usize) {
let v1 = vec![0u8; n];
let mut v2 = vec![0u8; n];
let v1 = AlignedVec::new(0, n);
let mut v2 = AlignedVec::new(0, n);
v2[n - 1] = 1;
b.bytes = n as u64;
b.iter(|| {
@ -74,20 +108,20 @@ fn memcmp_rust(b: &mut Bencher, n: usize) {
})
}
fn memmove_builtin(b: &mut Bencher, n: usize) {
let mut v = vec![0u8; n + n / 2];
fn memmove_builtin(b: &mut Bencher, n: usize, offset: usize) {
let mut v = AlignedVec::new(0, n + n / 2 + offset);
b.bytes = n as u64;
b.iter(|| {
let s: &mut [u8] = black_box(&mut v);
s.copy_within(0..n, n / 2);
s.copy_within(0..n, n / 2 + offset);
})
}
fn memmove_rust(b: &mut Bencher, n: usize) {
let mut v = vec![0u8; n + n / 2];
fn memmove_rust(b: &mut Bencher, n: usize, offset: usize) {
let mut v = AlignedVec::new(0, n + n / 2 + offset);
b.bytes = n as u64;
b.iter(|| {
let dst: *mut u8 = black_box(&mut v[n / 2..]).as_mut_ptr();
let dst: *mut u8 = black_box(&mut v[n / 2 + offset..]).as_mut_ptr();
let src: *const u8 = black_box(&v).as_ptr();
unsafe { memmove(dst, src, n) };
})
@ -95,35 +129,51 @@ fn memmove_rust(b: &mut Bencher, n: usize) {
#[bench]
fn memcpy_builtin_4096(b: &mut Bencher) {
memcpy_builtin(b, 4096, 0)
memcpy_builtin(b, 4096, 0, 0)
}
#[bench]
fn memcpy_rust_4096(b: &mut Bencher) {
memcpy_rust(b, 4096, 0)
memcpy_rust(b, 4096, 0, 0)
}
#[bench]
fn memcpy_builtin_1048576(b: &mut Bencher) {
memcpy_builtin(b, 1048576, 0)
memcpy_builtin(b, 1048576, 0, 0)
}
#[bench]
fn memcpy_rust_1048576(b: &mut Bencher) {
memcpy_rust(b, 1048576, 0)
memcpy_rust(b, 1048576, 0, 0)
}
#[bench]
fn memcpy_builtin_4096_offset(b: &mut Bencher) {
memcpy_builtin(b, 4096, 65)
memcpy_builtin(b, 4096, 65, 65)
}
#[bench]
fn memcpy_rust_4096_offset(b: &mut Bencher) {
memcpy_rust(b, 4096, 65)
memcpy_rust(b, 4096, 65, 65)
}
#[bench]
fn memcpy_builtin_1048576_offset(b: &mut Bencher) {
memcpy_builtin(b, 1048576, 65)
memcpy_builtin(b, 1048576, 65, 65)
}
#[bench]
fn memcpy_rust_1048576_offset(b: &mut Bencher) {
memcpy_rust(b, 1048576, 65)
memcpy_rust(b, 1048576, 65, 65)
}
#[bench]
fn memcpy_builtin_4096_misalign(b: &mut Bencher) {
memcpy_builtin(b, 4096, 65, 66)
}
#[bench]
fn memcpy_rust_4096_misalign(b: &mut Bencher) {
memcpy_rust(b, 4096, 65, 66)
}
#[bench]
fn memcpy_builtin_1048576_misalign(b: &mut Bencher) {
memcpy_builtin(b, 1048576, 65, 66)
}
#[bench]
fn memcpy_rust_1048576_misalign(b: &mut Bencher) {
memcpy_rust(b, 1048576, 65, 66)
}
#[bench]
@ -178,17 +228,33 @@ fn memcmp_rust_1048576(b: &mut Bencher) {
#[bench]
fn memmove_builtin_4096(b: &mut Bencher) {
memmove_builtin(b, 4096)
memmove_builtin(b, 4096, 0)
}
#[bench]
fn memmove_rust_4096(b: &mut Bencher) {
memmove_rust(b, 4096)
memmove_rust(b, 4096, 0)
}
#[bench]
fn memmove_builtin_1048576(b: &mut Bencher) {
memmove_builtin(b, 1048576)
memmove_builtin(b, 1048576, 0)
}
#[bench]
fn memmove_rust_1048576(b: &mut Bencher) {
memmove_rust(b, 1048576)
memmove_rust(b, 1048576, 0)
}
#[bench]
fn memmove_builtin_4096_misalign(b: &mut Bencher) {
memmove_builtin(b, 4096, 1)
}
#[bench]
fn memmove_rust_4096_misalign(b: &mut Bencher) {
memmove_rust(b, 4096, 1)
}
#[bench]
fn memmove_builtin_1048576_misalign(b: &mut Bencher) {
memmove_builtin(b, 1048576, 1)
}
#[bench]
fn memmove_rust_1048576_misalign(b: &mut Bencher) {
memmove_rust(b, 1048576, 1)
}

View file

@ -1,6 +1,8 @@
extern crate compiler_builtins;
use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
const WORD_SIZE: usize = core::mem::size_of::<usize>();
#[test]
fn memcpy_3() {
let mut arr: [u8; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11];
@ -131,3 +133,130 @@ fn memcmp_ne() {
assert!(memcmp(arr2.as_ptr(), arr1.as_ptr(), 8) > 0);
}
}
#[derive(Clone, Copy)]
struct AlignedStorage<const N: usize>([u8; N], [usize; 0]);
fn gen_arr<const N: usize>() -> AlignedStorage<N> {
let mut ret = AlignedStorage::<N>([0; N], []);
for i in 0..N {
ret.0[i] = i as u8;
}
ret
}
#[test]
fn memmove_forward_misaligned_nonaligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().offset(6);
let dst = arr.0.as_mut_ptr().offset(3);
assert_eq!(memmove(dst, src, 17), dst);
reference.0.copy_within(6..6 + 17, 3);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memmove_forward_misaligned_aligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().offset(6);
let dst = arr.0.as_mut_ptr().add(0);
assert_eq!(memmove(dst, src, 17), dst);
reference.0.copy_within(6..6 + 17, 0);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memmove_forward_aligned() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().add(3 + WORD_SIZE);
let dst = arr.0.as_mut_ptr().add(3);
assert_eq!(memmove(dst, src, 17), dst);
reference
.0
.copy_within(3 + WORD_SIZE..3 + WORD_SIZE + 17, 3);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memmove_backward_misaligned_nonaligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().offset(3);
let dst = arr.0.as_mut_ptr().offset(6);
assert_eq!(memmove(dst, src, 17), dst);
reference.0.copy_within(3..3 + 17, 6);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memmove_backward_misaligned_aligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().offset(3);
let dst = arr.0.as_mut_ptr().add(WORD_SIZE);
assert_eq!(memmove(dst, src, 17), dst);
reference.0.copy_within(3..3 + 17, WORD_SIZE);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memmove_backward_aligned() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let src = arr.0.as_ptr().add(3);
let dst = arr.0.as_mut_ptr().add(3 + WORD_SIZE);
assert_eq!(memmove(dst, src, 17), dst);
reference.0.copy_within(3..3 + 17, 3 + WORD_SIZE);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memset_backward_misaligned_nonaligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let ptr = arr.0.as_mut_ptr().offset(6);
assert_eq!(memset(ptr, 0xCC, 17), ptr);
core::ptr::write_bytes(reference.0.as_mut_ptr().add(6), 0xCC, 17);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memset_backward_misaligned_aligned_start() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let ptr = arr.0.as_mut_ptr().add(WORD_SIZE);
assert_eq!(memset(ptr, 0xCC, 17), ptr);
core::ptr::write_bytes(reference.0.as_mut_ptr().add(WORD_SIZE), 0xCC, 17);
assert_eq!(arr.0, reference.0);
}
}
#[test]
fn memset_backward_aligned() {
let mut arr = gen_arr::<32>();
let mut reference = arr;
unsafe {
let ptr = arr.0.as_mut_ptr().add(3 + WORD_SIZE);
assert_eq!(memset(ptr, 0xCC, 17), ptr);
core::ptr::write_bytes(reference.0.as_mut_ptr().add(3 + WORD_SIZE), 0xCC, 17);
assert_eq!(arr.0, reference.0);
}
}