From 5702f436aa6258119a32cbff31cc442d73b0d2c0 Mon Sep 17 00:00:00 2001 From: Djzin Date: Sun, 12 Mar 2017 18:32:20 +0000 Subject: [PATCH] a new approach; ditch xor cuteness and maximize cache locality --- src/libcore/mem.rs | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/libcore/mem.rs b/src/libcore/mem.rs index 748d63362463..e1b9991ccfdf 100644 --- a/src/libcore/mem.rs +++ b/src/libcore/mem.rs @@ -447,17 +447,29 @@ pub unsafe fn uninitialized() -> T { #[stable(feature = "rust1", since = "1.0.0")] pub fn swap(x: &mut T, y: &mut T) { unsafe { + // Give ourselves some scratch space to work with + let mut t: [u8; 16] = mem::uninitialized(); + let x = x as *mut T as *mut u8; let y = y as *mut T as *mut u8; + let t = &mut t as *mut _ as *mut u8; // can't use a for loop as the `range` impl calls `mem::swap` recursively + let len = size_of::() as isize; let mut i = 0; - while i < size_of::() as isize { - // use an xor-swap as x & y are guaranteed to never alias - *x.offset(i) ^= *y.offset(i); - *y.offset(i) ^= *x.offset(i); - *x.offset(i) ^= *y.offset(i); - i += 1; + while i + 16 <= len { + // Perform the swap 16 bytes at a time, `&mut` pointers never alias + ptr::copy_nonoverlapping(x.offset(i), t, 16); + ptr::copy_nonoverlapping(y.offset(i), x.offset(i), 16); + ptr::copy_nonoverlapping(t, y.offset(i), 16); + i += 16; + } + if i < len { + // Swap any remaining bytes + let rem = (len - i) as usize; + ptr::copy_nonoverlapping(x.offset(i), t, rem); + ptr::copy_nonoverlapping(y.offset(i), x.offset(i), rem); + ptr::copy_nonoverlapping(t, y.offset(i), rem); } } }