Slightly optimize main (32b) memcmp loop

It only seems to save a single instruction at first sight yet the
effects are significant.
This commit is contained in:
David Hoppenbrouwers 2022-05-28 22:46:16 +02:00
parent 95d2cd5502
commit b94e93ead8
No known key found for this signature in database
GPG key ID: A9156EA5E4B644FF

View file

@ -116,7 +116,8 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 {
// This should be equivalent to division with power-of-two sizes, except the former
// somehow still leaves a call to panic because ??
for _ in 0..n >> mem::size_of::<T>().trailing_zeros() {
let end = a.add(n >> mem::size_of::<T>().trailing_zeros());
while a != end {
if a.read_unaligned() != b.read_unaligned() {
return f(a.cast(), b.cast(), mem::size_of::<T>());
}