Rollup merge of #52051 - scottmcm:swap-directly, r=alexcrichton
mem::swap the obvious way for types smaller than the SIMD optimization's block size
LLVM isn't able to remove the alloca for the unaligned block in the post-SIMD tail in some cases, so doing this helps SRoA work in cases where it currently doesn't. Found in the `replace_with` RFC discussion.
Examples of the improvements:
<details>
<summary>swapping `[u16; 3]` takes 1/3 fewer instructions and no stackalloc</summary>
```rust
type Demo = [u16; 3];
pub fn swap_demo(x: &mut Demo, y: &mut Demo) {
std::mem::swap(x, y);
}
```
nightly:
```asm
_ZN4blah9swap_demo17ha1732a9b71393a7eE:
.seh_proc _ZN4blah9swap_demo17ha1732a9b71393a7eE
sub rsp, 32
.seh_stackalloc 32
.seh_endprologue
movzx eax, word ptr [rcx + 4]
mov word ptr [rsp + 4], ax
mov eax, dword ptr [rcx]
mov dword ptr [rsp], eax
movzx eax, word ptr [rdx + 4]
mov word ptr [rcx + 4], ax
mov eax, dword ptr [rdx]
mov dword ptr [rcx], eax
movzx eax, word ptr [rsp + 4]
mov word ptr [rdx + 4], ax
mov eax, dword ptr [rsp]
mov dword ptr [rdx], eax
add rsp, 32
ret
.seh_handlerdata
.section .text,"xr",one_only,_ZN4blah9swap_demo17ha1732a9b71393a7eE
.seh_endproc
```
this PR:
```asm
_ZN4blah9swap_demo17ha1732a9b71393a7eE:
mov r8d, dword ptr [rcx]
movzx r9d, word ptr [rcx + 4]
movzx eax, word ptr [rdx + 4]
mov word ptr [rcx + 4], ax
mov eax, dword ptr [rdx]
mov dword ptr [rcx], eax
mov word ptr [rdx + 4], r9w
mov dword ptr [rdx], r8d
ret
```
</details>
<details>
<summary>`replace_with` optimizes down much better</summary>
Inspired by https://github.com/rust-lang/rfcs/pull/2490,
```rust
fn replace_with<T, F>(x: &mut Option<T>, f: F)
where F: FnOnce(Option<T>) -> Option<T>
{
*x = f(x.take());
}
pub fn inc_opt(mut x: &mut Option<i32>) {
replace_with(&mut x, |i| i.map(|j| j + 1));
}
```
Rust 1.26.0:
```asm
_ZN4blah7inc_opt17heb0acb64c51777cfE:
mov rax, qword ptr [rcx]
movabs r8, 4294967296
add r8, rax
shl rax, 32
movabs rdx, -4294967296
and rdx, r8
xor r8d, r8d
test rax, rax
cmove rdx, rax
setne r8b
or rdx, r8
mov qword ptr [rcx], rdx
ret
```
Nightly (better thanks to ScalarPair, maybe?):
```asm
_ZN4blah7inc_opt17h66df690be0b5899dE:
mov r8, qword ptr [rcx]
mov rdx, r8
shr rdx, 32
xor eax, eax
test r8d, r8d
setne al
add edx, 1
mov dword ptr [rcx], eax
mov dword ptr [rcx + 4], edx
ret
```
This PR:
```asm
_ZN4blah7inc_opt17h1426dc215ecbdb19E:
xor eax, eax
cmp dword ptr [rcx], 0
setne al
mov dword ptr [rcx], eax
add dword ptr [rcx + 4], 1
ret
```
Where that add is beautiful -- using an addressing mode to not even need to explicitly go through a register -- and the remaining imperfection is well-known (https://github.com/rust-lang/rust/pull/49420#issuecomment-376805721).
</details>
This commit is contained in:
commit
b954d4d1b5
3 changed files with 41 additions and 1 deletions
|
|
@ -638,7 +638,7 @@ pub unsafe fn uninitialized<T>() -> T {
|
|||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
pub fn swap<T>(x: &mut T, y: &mut T) {
|
||||
unsafe {
|
||||
ptr::swap_nonoverlapping(x, y, 1);
|
||||
ptr::swap_nonoverlapping_one(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -187,6 +187,19 @@ pub unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize) {
|
|||
swap_nonoverlapping_bytes(x, y, len)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) unsafe fn swap_nonoverlapping_one<T>(x: *mut T, y: *mut T) {
|
||||
// For types smaller than the block optimization below,
|
||||
// just swap directly to avoid pessimizing codegen.
|
||||
if mem::size_of::<T>() < 32 {
|
||||
let z = read(x);
|
||||
copy_nonoverlapping(y, x, 1);
|
||||
write(y, z);
|
||||
} else {
|
||||
swap_nonoverlapping(x, y, 1);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn swap_nonoverlapping_bytes(x: *mut u8, y: *mut u8, len: usize) {
|
||||
// The approach here is to utilize simd to swap x & y efficiently. Testing reveals
|
||||
|
|
|
|||
27
src/test/codegen/swap-small-types.rs
Normal file
27
src/test/codegen/swap-small-types.rs
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
// compile-flags: -O
|
||||
// only-x86_64
|
||||
|
||||
#![crate_type = "lib"]
|
||||
|
||||
use std::mem::swap;
|
||||
|
||||
type RGB48 = [u16; 3];
|
||||
|
||||
// CHECK-LABEL: @swap_rgb48
|
||||
#[no_mangle]
|
||||
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load i48
|
||||
// CHECK: store i48
|
||||
swap(x, y)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue