auto merge of #6724 : thestinger/rust/swap_fast, r=thestinger
Passing higher alignment values gives the optimization passes more freedom since it can copy in larger chunks. This change results in rustc outputting the same post-optimization IR as clang for swaps and most copies excluding the lack of information about padding.
Code snippet:
```rust
#[inline(never)]
fn swap<T>(x: &mut T, y: &mut T) {
util::swap(x, y);
}
```
Original IR (for `int`):
```llvm
define internal fastcc void @_ZN9swap_283417_a71830ca3ed2d65d3_00E(i64*, i64*) #1 {
static_allocas:
%2 = icmp eq i64* %0, %1
br i1 %2, label %_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit, label %3
; <label>:3 ; preds = %static_allocas
%4 = load i64* %0, align 1
%5 = load i64* %1, align 1
store i64 %5, i64* %0, align 1
store i64 %4, i64* %1, align 1
br label %_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit
_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit: ; preds = %3, %static_allocas
ret void
}
```
After #6710:
```llvm
define internal fastcc void @_ZN9swap_283017_a71830ca3ed2d65d3_00E(i64* nocapture, i64* nocapture) #1 {
static_allocas:
%2 = load i64* %0, align 1
%3 = load i64* %1, align 1
store i64 %3, i64* %0, align 1
store i64 %2, i64* %1, align 1
ret void
}
```
After this change:
```llvm
define internal fastcc void @_ZN9swap_283017_a71830ca3ed2d65d3_00E(i64* nocapture, i64* nocapture) #1 {
static_allocas:
%2 = load i64* %0, align 8
%3 = load i64* %1, align 8
store i64 %3, i64* %0, align 8
store i64 %2, i64* %1, align 8
ret void
}
```
Another example:
```rust
#[inline(never)]
fn set<T>(x: &mut T, y: T) {
*x = y;
}
```
Before, with `(int, int)` (align 1):
```llvm
define internal fastcc void @_ZN8set_282517_8fa972e3f9e451983_00E({ i64, i64 }* nocapture, { i64, i64 }* nocapture) #1 {
static_allocas:
%2 = bitcast { i64, i64 }* %1 to i8*
%3 = bitcast { i64, i64 }* %0 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 1, i1 false)
ret void
}
```
After, with `(int, int)` (align 8):
```llvm
define internal fastcc void @_ZN8set_282617_8fa972e3f9e451983_00E({ i64, i64 }* nocapture, { i64, i64 }* nocapture) #1 {
static_allocas:
%2 = bitcast { i64, i64 }* %1 to i8*
%3 = bitcast { i64, i64 }* %0 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 8, i1 false)
ret void
}
```
This commit is contained in:
commit
dbc57584bd
9 changed files with 232 additions and 81 deletions
|
|
@ -14,8 +14,9 @@ use sys;
|
|||
use unstable::intrinsics;
|
||||
|
||||
/// Casts the value at `src` to U. The two types must have the same length.
|
||||
#[cfg(stage0)]
|
||||
pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
|
||||
let mut dest: U = intrinsics::init();
|
||||
let mut dest: U = intrinsics::uninit();
|
||||
{
|
||||
let dest_ptr: *mut u8 = transmute(&mut dest);
|
||||
let src_ptr: *u8 = transmute(src);
|
||||
|
|
@ -26,6 +27,26 @@ pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
|
|||
dest
|
||||
}
|
||||
|
||||
#[cfg(target_word_size = "32", not(stage0))]
|
||||
#[inline(always)]
|
||||
pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
|
||||
let mut dest: U = intrinsics::uninit();
|
||||
let dest_ptr: *mut u8 = transmute(&mut dest);
|
||||
let src_ptr: *u8 = transmute(src);
|
||||
intrinsics::memcpy32(dest_ptr, src_ptr, sys::size_of::<U>() as u32);
|
||||
dest
|
||||
}
|
||||
|
||||
#[cfg(target_word_size = "64", not(stage0))]
|
||||
#[inline(always)]
|
||||
pub unsafe fn transmute_copy<T, U>(src: &T) -> U {
|
||||
let mut dest: U = intrinsics::uninit();
|
||||
let dest_ptr: *mut u8 = transmute(&mut dest);
|
||||
let src_ptr: *u8 = transmute(src);
|
||||
intrinsics::memcpy64(dest_ptr, src_ptr, sys::size_of::<U>() as u64);
|
||||
dest
|
||||
}
|
||||
|
||||
/**
|
||||
* Move a thing into the void
|
||||
*
|
||||
|
|
@ -43,6 +64,7 @@ pub unsafe fn forget<T>(thing: T) { intrinsics::forget(thing); }
|
|||
* and/or reinterpret_cast when such calls would otherwise scramble a box's
|
||||
* reference count
|
||||
*/
|
||||
#[inline(always)]
|
||||
pub unsafe fn bump_box_refcount<T>(t: @T) { forget(t); }
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ use sys;
|
|||
#[cfg(not(test))] use cmp::{Eq, Ord};
|
||||
use uint;
|
||||
|
||||
#[cfg(stage0)]
|
||||
pub mod libc_ {
|
||||
use libc::c_void;
|
||||
use libc;
|
||||
|
|
@ -26,12 +27,6 @@ pub mod libc_ {
|
|||
#[nolink]
|
||||
#[abi = "cdecl"]
|
||||
pub extern {
|
||||
#[rust_stack]
|
||||
unsafe fn memmove(dest: *mut c_void,
|
||||
src: *const c_void,
|
||||
n: libc::size_t)
|
||||
-> *c_void;
|
||||
|
||||
#[rust_stack]
|
||||
unsafe fn memset(dest: *mut c_void,
|
||||
c: libc::c_int,
|
||||
|
|
@ -97,15 +92,28 @@ pub fn is_not_null<T>(ptr: *const T) -> bool { !is_null(ptr) }
|
|||
* and destination may overlap.
|
||||
*/
|
||||
#[inline(always)]
|
||||
#[cfg(target_word_size = "32")]
|
||||
#[cfg(target_word_size = "32", stage0)]
|
||||
pub unsafe fn copy_memory<T>(dst: *mut T, src: *const T, count: uint) {
|
||||
use unstable::intrinsics::memmove32;
|
||||
let n = count * sys::size_of::<T>();
|
||||
memmove32(dst as *mut u8, src as *u8, n as u32);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copies data from one location to another
|
||||
*
|
||||
* Copies `count` elements (not bytes) from `src` to `dst`. The source
|
||||
* and destination may overlap.
|
||||
*/
|
||||
#[inline(always)]
|
||||
#[cfg(target_word_size = "64")]
|
||||
#[cfg(target_word_size = "32", not(stage0))]
|
||||
pub unsafe fn copy_memory<T>(dst: *mut T, src: *const T, count: uint) {
|
||||
use unstable::intrinsics::memmove32;
|
||||
memmove32(dst, src as *T, count as u32);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[cfg(target_word_size = "64", stage0)]
|
||||
pub unsafe fn copy_memory<T>(dst: *mut T, src: *const T, count: uint) {
|
||||
use unstable::intrinsics::memmove64;
|
||||
let n = count * sys::size_of::<T>();
|
||||
|
|
@ -113,33 +121,63 @@ pub unsafe fn copy_memory<T>(dst: *mut T, src: *const T, count: uint) {
|
|||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[cfg(target_word_size = "32")]
|
||||
#[cfg(target_word_size = "64", not(stage0))]
|
||||
pub unsafe fn copy_memory<T>(dst: *mut T, src: *const T, count: uint) {
|
||||
use unstable::intrinsics::memmove64;
|
||||
memmove64(dst, src as *T, count as u64);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[cfg(target_word_size = "32", stage0)]
|
||||
pub unsafe fn copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T, count: uint) {
|
||||
use unstable::intrinsics::memmove32;
|
||||
let n = count * sys::size_of::<T>();
|
||||
memmove32(dst as *mut u8, src as *u8, n as u32);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[cfg(target_word_size = "32", not(stage0))]
|
||||
pub unsafe fn copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T, count: uint) {
|
||||
#[cfg(stage0)]
|
||||
use memcpy32 = unstable::intrinsics::memmove32;
|
||||
#[cfg(not(stage0))]
|
||||
use unstable::intrinsics::memcpy32;
|
||||
let n = count * sys::size_of::<T>();
|
||||
memcpy32(dst as *mut u8, src as *u8, n as u32);
|
||||
memcpy32(dst, src as *T, count as u32);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[cfg(target_word_size = "64")]
|
||||
#[cfg(target_word_size = "64", stage0)]
|
||||
pub unsafe fn copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T, count: uint) {
|
||||
#[cfg(stage0)]
|
||||
use memcpy64 = unstable::intrinsics::memmove64;
|
||||
#[cfg(not(stage0))]
|
||||
use unstable::intrinsics::memcpy64;
|
||||
use unstable::intrinsics::memmove64;
|
||||
let n = count * sys::size_of::<T>();
|
||||
memcpy64(dst as *mut u8, src as *u8, n as u64);
|
||||
memmove64(dst as *mut u8, src as *u8, n as u64);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[cfg(target_word_size = "64", not(stage0))]
|
||||
pub unsafe fn copy_nonoverlapping_memory<T>(dst: *mut T, src: *const T, count: uint) {
|
||||
use unstable::intrinsics::memcpy64;
|
||||
memcpy64(dst, src as *T, count as u64);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[cfg(stage0)]
|
||||
pub unsafe fn set_memory<T>(dst: *mut T, c: int, count: uint) {
|
||||
let n = count * sys::size_of::<T>();
|
||||
libc_::memset(dst as *mut c_void, c as libc::c_int, n as size_t);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[cfg(target_word_size = "32", not(stage0))]
|
||||
pub unsafe fn set_memory<T>(dst: *mut T, c: u8, count: uint) {
|
||||
use unstable::intrinsics::memset32;
|
||||
memset32(dst, c, count as u32);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[cfg(target_word_size = "64", not(stage0))]
|
||||
pub unsafe fn set_memory<T>(dst: *mut T, c: u8, count: uint) {
|
||||
use unstable::intrinsics::memset64;
|
||||
memset64(dst, c, count as u64);
|
||||
}
|
||||
|
||||
/**
|
||||
Transform a region pointer - &T - to an unsafe pointer - *T.
|
||||
This is safe, but is implemented with an unsafe block due to
|
||||
|
|
@ -581,4 +619,12 @@ pub mod ptr_tests {
|
|||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_set_memory() {
|
||||
let mut xs = [0u8, ..20];
|
||||
let ptr = vec::raw::to_mut_ptr(xs);
|
||||
unsafe { set_memory(ptr, 5u8, xs.len()); }
|
||||
assert_eq!(xs, [5u8, ..20]);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ The corresponding definitions are in librustc/middle/trans/foreign.rs.
|
|||
|
||||
The atomic intrinsics provide common atomic operations on machine
|
||||
words, with multiple possible memory orderings. They obey the same
|
||||
semantics as C++0x. See the LLVM documentation on [[atomics]].
|
||||
semantics as C++11. See the LLVM documentation on [[atomics]].
|
||||
|
||||
[atomics]: http://llvm.org/docs/Atomics.html
|
||||
|
||||
|
|
@ -31,6 +31,7 @@ A quick refresher on memory ordering:
|
|||
with atomic types and is equivalent to Java's `volatile`.
|
||||
|
||||
*/
|
||||
|
||||
#[abi = "rust-intrinsic"]
|
||||
pub extern "rust-intrinsic" {
|
||||
|
||||
|
|
@ -127,18 +128,40 @@ pub extern "rust-intrinsic" {
|
|||
/// Get the address of the `__morestack` stack growth function.
|
||||
pub fn morestack_addr() -> *();
|
||||
|
||||
/// Equivalent to the `llvm.memcpy.p0i8.0i8.i32` intrinsic.
|
||||
/// Equivalent to the `llvm.memcpy.p0i8.0i8.i32` intrinsic, with a size of
|
||||
/// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
|
||||
#[cfg(not(stage0))]
|
||||
pub fn memcpy32(dst: *mut u8, src: *u8, size: u32);
|
||||
/// Equivalent to the `llvm.memcpy.p0i8.0i8.i64` intrinsic.
|
||||
pub fn memcpy32<T>(dst: *mut T, src: *T, count: u32);
|
||||
/// Equivalent to the `llvm.memcpy.p0i8.0i8.i64` intrinsic, with a size of
|
||||
/// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
|
||||
#[cfg(not(stage0))]
|
||||
pub fn memcpy64(dst: *mut u8, src: *u8, size: u64);
|
||||
pub fn memcpy64<T>(dst: *mut T, src: *T, count: u64);
|
||||
|
||||
/// Equivalent to the `llvm.memmove.p0i8.0i8.i32` intrinsic.
|
||||
#[cfg(stage0)]
|
||||
pub fn memmove32(dst: *mut u8, src: *u8, size: u32);
|
||||
/// Equivalent to the `llvm.memmove.p0i8.0i8.i64` intrinsic.
|
||||
#[cfg(stage0)]
|
||||
pub fn memmove64(dst: *mut u8, src: *u8, size: u64);
|
||||
|
||||
/// Equivalent to the `llvm.memmove.p0i8.0i8.i32` intrinsic, with a size of
|
||||
/// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
|
||||
#[cfg(not(stage0))]
|
||||
pub fn memmove32<T>(dst: *mut T, src: *T, count: u32);
|
||||
/// Equivalent to the `llvm.memmove.p0i8.0i8.i64` intrinsic, with a size of
|
||||
/// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
|
||||
#[cfg(not(stage0))]
|
||||
pub fn memmove64<T>(dst: *mut T, src: *T, count: u64);
|
||||
|
||||
/// Equivalent to the `llvm.memset.p0i8.i32` intrinsic, with a size of
|
||||
/// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
|
||||
#[cfg(not(stage0))]
|
||||
pub fn memset32<T>(dst: *mut T, val: u8, count: u32);
|
||||
/// Equivalent to the `llvm.memset.p0i8.i64` intrinsic, with a size of
|
||||
/// `count` * `size_of::<T>()` and an alignment of `min_align_of::<T>()`
|
||||
#[cfg(not(stage0))]
|
||||
pub fn memset64<T>(dst: *mut T, val: u8, count: u64);
|
||||
|
||||
pub fn sqrtf32(x: f32) -> f32;
|
||||
pub fn sqrtf64(x: f64) -> f64;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue