copy_misaligned_words: avoid out-of-bounds accesses (#799)

* copy_misaligned_words: avoid out-of-bounds accesses
* add test to make Miri able to detect OOB in memmove
* run Miri on CI
This commit is contained in:
Ralf Jung 2025-03-22 06:36:40 +01:00 committed by GitHub
parent 7b674e5a75
commit 5cf993880a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 183 additions and 32 deletions

View file

@ -160,6 +160,21 @@ jobs:
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
miri:
name: Miri
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install Rust (rustup)
run: rustup update nightly --no-self-update && rustup default nightly
shell: bash
- run: rustup component add miri
- run: cargo miri setup
- uses: Swatinem/rust-cache@v2
- run: ./ci/miri.sh
rustfmt:
name: Rustfmt
runs-on: ubuntu-latest
@ -190,6 +205,7 @@ jobs:
- test
- rustfmt
- clippy
- miri
runs-on: ubuntu-latest
# GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
# failed" as success. So we have to do some contortions to ensure the job fails if any of its

View file

@ -0,0 +1,16 @@
#!/bin/bash
set -ex
# We need Tree Borrows as some of our raw pointer patterns are not
# compatible with Stacked Borrows.
export MIRIFLAGS="-Zmiri-tree-borrows"
# One target that sets `mem-unaligned` and one that does not,
# and a big-endian target.
TARGETS=(x86_64-unknown-linux-gnu
armv7-unknown-linux-gnueabihf
s390x-unknown-linux-gnu)
for TARGET in "${TARGETS[@]}"; do
# Only run the `mem` tests to avoid this taking too long.
cargo miri test --manifest-path testcrate/Cargo.toml --features no-asm --target $TARGET -- mem
done

View file

@ -41,6 +41,72 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
core::mem::transmute(x_read)
}
/// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
/// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the
/// chunk size if a load happened.
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn load_chunk_aligned<T: Copy>(
src: *const usize,
dst: *mut usize,
load_sz: usize,
offset: usize,
) -> usize {
let chunk_sz = core::mem::size_of::<T>();
if (load_sz & chunk_sz) != 0 {
*dst.wrapping_byte_add(offset).cast::<T>() = *src.wrapping_byte_add(offset).cast::<T>();
offset | chunk_sz
} else {
offset
}
}
/// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
/// read with the out-of-bounds part filled with 0s.
/// `load_sz` be strictly less than `WORD_SIZE`.
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
debug_assert!(load_sz < WORD_SIZE);
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
// (since `load_sz < WORD_SIZE`).
const { assert!(WORD_SIZE <= 8) };
let mut i = 0;
let mut out = 0usize;
// We load in decreasing order, so the pointers remain sufficiently aligned for the next step.
i = load_chunk_aligned::<u32>(src, &raw mut out, load_sz, i);
i = load_chunk_aligned::<u16>(src, &raw mut out, load_sz, i);
i = load_chunk_aligned::<u8>(src, &raw mut out, load_sz, i);
debug_assert!(i == load_sz);
out
}
/// Load `load_sz` many bytes from `src.wrapping_byte_add(WORD_SIZE - load_sz)`. `src` must be
/// `usize`-aligned. The bytes are returned as the *last* bytes of the return value, i.e., this acts
/// as if we had done a `usize` read from `src`, with the out-of-bounds part filled with 0s.
/// `load_sz` be strictly less than `WORD_SIZE`.
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize {
debug_assert!(load_sz < WORD_SIZE);
// We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
// (since `load_sz < WORD_SIZE`).
const { assert!(WORD_SIZE <= 8) };
let mut i = 0;
let mut out = 0usize;
// Obtain pointers pointing to the beginning of the range we want to load.
let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz);
let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz);
// We load in increasing order, so by the time we reach `u16` things are 2-aligned etc.
i = load_chunk_aligned::<u8>(src_shifted, out_shifted, load_sz, i);
i = load_chunk_aligned::<u16>(src_shifted, out_shifted, load_sz, i);
i = load_chunk_aligned::<u32>(src_shifted, out_shifted, load_sz, i);
debug_assert!(i == load_sz);
out
}
#[inline(always)]
pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
#[inline(always)]
@ -66,40 +132,57 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
}
}
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
/// `src` *must not* be `usize`-aligned.
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
debug_assert!(n > 0 && n % WORD_SIZE == 0);
debug_assert!(src.addr() % WORD_SIZE != 0);
let mut dest_usize = dest as *mut usize;
let dest_end = dest.wrapping_add(n) as *mut usize;
// Calculate the misalignment offset and shift needed to reassemble value.
// Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
let offset = src as usize & WORD_MASK;
let shift = offset * 8;
// Realign src
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
// This will read (but won't use) bytes out of bound.
// cfg needed because not all targets will have atomic loads that can be lowered
// (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
#[cfg(target_has_atomic_load_store = "ptr")]
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
#[cfg(not(target_has_atomic_load_store = "ptr"))]
let mut prev_word = core::ptr::read_volatile(src_aligned);
let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize;
let mut prev_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset);
while dest_usize < dest_end {
while dest_usize.wrapping_add(1) < dest_end {
src_aligned = src_aligned.wrapping_add(1);
let cur_word = *src_aligned;
#[cfg(target_endian = "little")]
let resembled = prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift);
#[cfg(target_endian = "big")]
let resembled = prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift);
let reassembled = if cfg!(target_endian = "little") {
prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift)
} else {
prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift)
};
prev_word = cur_word;
*dest_usize = resembled;
*dest_usize = reassembled;
dest_usize = dest_usize.wrapping_add(1);
}
// There's one more element left to go, and we can't use the loop for that as on the `src` side,
// it is partially out-of-bounds.
src_aligned = src_aligned.wrapping_add(1);
let cur_word = load_aligned_partial(src_aligned, offset);
let reassembled = if cfg!(target_endian = "little") {
prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift)
} else {
prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift)
};
// prev_word does not matter any more
*dest_usize = reassembled;
// dest_usize does not matter any more
}
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
/// `src` *must not* be `usize`-aligned.
#[cfg(feature = "mem-unaligned")]
#[inline(always)]
unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
@ -164,40 +247,57 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
}
}
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
/// `src` *must not* be `usize`-aligned.
#[cfg(not(feature = "mem-unaligned"))]
#[inline(always)]
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
debug_assert!(n > 0 && n % WORD_SIZE == 0);
debug_assert!(src.addr() % WORD_SIZE != 0);
let mut dest_usize = dest as *mut usize;
let dest_start = dest.wrapping_sub(n) as *mut usize;
let dest_start = dest.wrapping_sub(n) as *mut usize; // we're moving towards the start
// Calculate the misalignment offset and shift needed to reassemble value.
// Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
let offset = src as usize & WORD_MASK;
let shift = offset * 8;
// Realign src_aligned
let mut src_aligned = (src as usize & !WORD_MASK) as *mut usize;
// This will read (but won't use) bytes out of bound.
// cfg needed because not all targets will have atomic loads that can be lowered
// (e.g. BPF, MSP430), or provided by an external library (e.g. RV32I)
#[cfg(target_has_atomic_load_store = "ptr")]
let mut prev_word = core::intrinsics::atomic_load_unordered(src_aligned);
#[cfg(not(target_has_atomic_load_store = "ptr"))]
let mut prev_word = core::ptr::read_volatile(src_aligned);
// Realign src
let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize;
let mut prev_word = load_aligned_partial(src_aligned, offset);
while dest_start < dest_usize {
while dest_start.wrapping_add(1) < dest_usize {
src_aligned = src_aligned.wrapping_sub(1);
let cur_word = *src_aligned;
#[cfg(target_endian = "little")]
let resembled = prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift;
#[cfg(target_endian = "big")]
let resembled = prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift;
let reassembled = if cfg!(target_endian = "little") {
prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift
} else {
prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift
};
prev_word = cur_word;
dest_usize = dest_usize.wrapping_sub(1);
*dest_usize = resembled;
*dest_usize = reassembled;
}
// There's one more element left to go, and we can't use the loop for that as on the `src` side,
// it is partially out-of-bounds.
src_aligned = src_aligned.wrapping_sub(1);
let cur_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset);
let reassembled = if cfg!(target_endian = "little") {
prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift
} else {
prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift
};
// prev_word does not matter any more
dest_usize = dest_usize.wrapping_sub(1);
*dest_usize = reassembled;
}
/// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
/// `src` *must not* be `usize`-aligned.
#[cfg(feature = "mem-unaligned")]
#[inline(always)]
unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {

View file

@ -128,11 +128,13 @@ fn memcmp_eq() {
#[test]
fn memcmp_ne() {
let arr1 @ arr2 = gen_arr::<256>();
for i in 0..256 {
// Reduce iteration count in Miri as it is too slow otherwise.
let limit = if cfg!(miri) { 64 } else { 256 };
for i in 0..limit {
let mut diff_arr = arr1;
diff_arr.0[i] = 127;
let expect = diff_arr.0[i].cmp(&arr2.0[i]);
for k in i + 1..256 {
for k in i + 1..limit {
let result = unsafe { memcmp(diff_arr.0.as_ptr(), arr2.0.as_ptr(), k) };
assert_eq!(expect, result.cmp(&0));
}
@ -230,6 +232,23 @@ fn memmove_backward_aligned() {
}
}
#[test]
fn memmove_misaligned_bounds() {
// The above test have the downside that the addresses surrounding the range-to-copy are all
// still in-bounds, so Miri would not actually complain about OOB accesses. So we also test with
// an array that has just the right size. We test a few times to avoid it being accidentally
// aligned.
for _ in 0..8 {
let mut arr1 = [0u8; 17];
let mut arr2 = [0u8; 17];
unsafe {
// Copy both ways so we hit both the forward and backward cases.
memmove(arr1.as_mut_ptr(), arr2.as_mut_ptr(), 17);
memmove(arr2.as_mut_ptr(), arr1.as_mut_ptr(), 17);
}
}
}
#[test]
fn memset_backward_misaligned_nonaligned_start() {
let mut arr = gen_arr::<32>();