Slightly optimize main (32b) memcmp loop

It only seems to save a single instruction at first sight yet the effects are significant.
2022-05-28 22:46:16 +02:00 · 2022-05-28 22:46:16 +02:00 · b94e93ead8
commit b94e93ead8
parent 95d2cd5502
1 changed files with 2 additions and 1 deletions
--- a/library/compiler-builtins/src/mem/x86_64.rs
+++ b/library/compiler-builtins/src/mem/x86_64.rs
@ -116,7 +116,8 @@ pub unsafe fn compare_bytes(a: *const u8, b: *const u8, n: usize) -> i32 {

        // This should be equivalent to division with power-of-two sizes, except the former
        // somehow still leaves a call to panic because ??
-        for _ in 0..n >> mem::size_of::<T>().trailing_zeros() {
+        let end = a.add(n >> mem::size_of::<T>().trailing_zeros());
+        while a != end {
            if a.read_unaligned() != b.read_unaligned() {
                return f(a.cast(), b.cast(), mem::size_of::<T>());
            }