auto merge of #6724 : thestinger/rust/swap_fast, r=thestinger
Passing higher alignment values gives the optimization passes more freedom since it can copy in larger chunks. This change results in rustc outputting the same post-optimization IR as clang for swaps and most copies excluding the lack of information about padding.
Code snippet:
```rust
#[inline(never)]
fn swap<T>(x: &mut T, y: &mut T) {
util::swap(x, y);
}
```
Original IR (for `int`):
```llvm
define internal fastcc void @_ZN9swap_283417_a71830ca3ed2d65d3_00E(i64*, i64*) #1 {
static_allocas:
%2 = icmp eq i64* %0, %1
br i1 %2, label %_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit, label %3
; <label>:3 ; preds = %static_allocas
%4 = load i64* %0, align 1
%5 = load i64* %1, align 1
store i64 %5, i64* %0, align 1
store i64 %4, i64* %1, align 1
br label %_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit
_ZN4util9swap_283717_a71830ca3ed2d65d3_00E.exit: ; preds = %3, %static_allocas
ret void
}
```
After #6710:
```llvm
define internal fastcc void @_ZN9swap_283017_a71830ca3ed2d65d3_00E(i64* nocapture, i64* nocapture) #1 {
static_allocas:
%2 = load i64* %0, align 1
%3 = load i64* %1, align 1
store i64 %3, i64* %0, align 1
store i64 %2, i64* %1, align 1
ret void
}
```
After this change:
```llvm
define internal fastcc void @_ZN9swap_283017_a71830ca3ed2d65d3_00E(i64* nocapture, i64* nocapture) #1 {
static_allocas:
%2 = load i64* %0, align 8
%3 = load i64* %1, align 8
store i64 %3, i64* %0, align 8
store i64 %2, i64* %1, align 8
ret void
}
```
Another example:
```rust
#[inline(never)]
fn set<T>(x: &mut T, y: T) {
*x = y;
}
```
Before, with `(int, int)` (align 1):
```llvm
define internal fastcc void @_ZN8set_282517_8fa972e3f9e451983_00E({ i64, i64 }* nocapture, { i64, i64 }* nocapture) #1 {
static_allocas:
%2 = bitcast { i64, i64 }* %1 to i8*
%3 = bitcast { i64, i64 }* %0 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 1, i1 false)
ret void
}
```
After, with `(int, int)` (align 8):
```llvm
define internal fastcc void @_ZN8set_282617_8fa972e3f9e451983_00E({ i64, i64 }* nocapture, { i64, i64 }* nocapture) #1 {
static_allocas:
%2 = bitcast { i64, i64 }* %1 to i8*
%3 = bitcast { i64, i64 }* %0 to i8*
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 8, i1 false)
ret void
}
```
This commit is contained in:
commit
dbc57584bd
9 changed files with 232 additions and 81 deletions
|
|
@ -52,7 +52,7 @@ use middle::trans::foreign;
|
|||
use middle::trans::glue;
|
||||
use middle::trans::inline;
|
||||
use middle::trans::machine;
|
||||
use middle::trans::machine::llsize_of;
|
||||
use middle::trans::machine::{llalign_of_min, llsize_of};
|
||||
use middle::trans::meth;
|
||||
use middle::trans::monomorphize;
|
||||
use middle::trans::reachable;
|
||||
|
|
@ -1442,12 +1442,7 @@ pub fn with_cond(bcx: block, val: ValueRef, f: &fn(block) -> block) -> block {
|
|||
next_cx
|
||||
}
|
||||
|
||||
pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef,
|
||||
n_bytes: ValueRef) {
|
||||
// FIXME (Related to #1645, I think?): Provide LLVM with better
|
||||
// alignment information when the alignment is statically known (it must
|
||||
// be nothing more than a constant int, or LLVM complains -- not even a
|
||||
// constant element of a tydesc works).
|
||||
pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef, n_bytes: ValueRef, align: u32) {
|
||||
let _icx = cx.insn_ctxt("call_memcpy");
|
||||
let ccx = cx.ccx();
|
||||
let key = match ccx.sess.targ_cfg.arch {
|
||||
|
|
@ -1462,7 +1457,7 @@ pub fn call_memcpy(cx: block, dst: ValueRef, src: ValueRef,
|
|||
let src_ptr = PointerCast(cx, src, T_ptr(T_i8()));
|
||||
let dst_ptr = PointerCast(cx, dst, T_ptr(T_i8()));
|
||||
let size = IntCast(cx, n_bytes, ccx.int_type);
|
||||
let align = C_i32(1i32);
|
||||
let align = C_i32(align as i32);
|
||||
let volatile = C_i1(false);
|
||||
Call(cx, memcpy, [dst_ptr, src_ptr, size, align, volatile]);
|
||||
}
|
||||
|
|
@ -1471,8 +1466,10 @@ pub fn memcpy_ty(bcx: block, dst: ValueRef, src: ValueRef, t: ty::t) {
|
|||
let _icx = bcx.insn_ctxt("memcpy_ty");
|
||||
let ccx = bcx.ccx();
|
||||
if ty::type_is_structural(t) {
|
||||
let llsz = llsize_of(ccx, type_of::type_of(ccx, t));
|
||||
call_memcpy(bcx, dst, src, llsz);
|
||||
let llty = type_of::type_of(ccx, t);
|
||||
let llsz = llsize_of(ccx, llty);
|
||||
let llalign = llalign_of_min(ccx, llty);
|
||||
call_memcpy(bcx, dst, src, llsz, llalign as u32);
|
||||
} else {
|
||||
Store(bcx, Load(bcx, src), dst);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -521,7 +521,7 @@ pub fn make_opaque_cbox_take_glue(
|
|||
[opaque_tydesc, sz],
|
||||
expr::SaveIn(rval));
|
||||
let cbox_out = PointerCast(bcx, Load(bcx, rval), llopaquecboxty);
|
||||
call_memcpy(bcx, cbox_out, cbox_in, sz);
|
||||
call_memcpy(bcx, cbox_out, cbox_in, sz, 1);
|
||||
Store(bcx, cbox_out, cboxptr);
|
||||
|
||||
// Take the (deeply cloned) type descriptor
|
||||
|
|
|
|||
|
|
@ -787,7 +787,7 @@ pub fn trans_intrinsic(ccx: @CrateContext,
|
|||
let llsrcptr = PointerCast(bcx, llsrcptr, T_ptr(T_i8()));
|
||||
|
||||
let llsize = llsize_of(ccx, llintype);
|
||||
call_memcpy(bcx, lldestptr, llsrcptr, llsize);
|
||||
call_memcpy(bcx, lldestptr, llsrcptr, llsize, 1);
|
||||
}
|
||||
}
|
||||
~"needs_drop" => {
|
||||
|
|
@ -846,44 +846,82 @@ pub fn trans_intrinsic(ccx: @CrateContext,
|
|||
Store(bcx, morestack_addr, fcx.llretptr.get());
|
||||
}
|
||||
~"memcpy32" => {
|
||||
let dst_ptr = get_param(decl, first_real_arg);
|
||||
let src_ptr = get_param(decl, first_real_arg + 1);
|
||||
let size = get_param(decl, first_real_arg + 2);
|
||||
let align = C_i32(1);
|
||||
let tp_ty = substs.tys[0];
|
||||
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
||||
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
|
||||
let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);
|
||||
|
||||
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
|
||||
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
|
||||
let count = get_param(decl, first_real_arg + 2);
|
||||
let volatile = C_i1(false);
|
||||
let llfn = *bcx.ccx().intrinsics.get(
|
||||
&~"llvm.memcpy.p0i8.p0i8.i32");
|
||||
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
|
||||
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memcpy.p0i8.p0i8.i32");
|
||||
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
|
||||
}
|
||||
~"memcpy64" => {
|
||||
let dst_ptr = get_param(decl, first_real_arg);
|
||||
let src_ptr = get_param(decl, first_real_arg + 1);
|
||||
let size = get_param(decl, first_real_arg + 2);
|
||||
let align = C_i32(1);
|
||||
let tp_ty = substs.tys[0];
|
||||
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
||||
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
|
||||
let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);
|
||||
|
||||
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
|
||||
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
|
||||
let count = get_param(decl, first_real_arg + 2);
|
||||
let volatile = C_i1(false);
|
||||
let llfn = *bcx.ccx().intrinsics.get(
|
||||
&~"llvm.memcpy.p0i8.p0i8.i64");
|
||||
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
|
||||
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memcpy.p0i8.p0i8.i64");
|
||||
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
|
||||
}
|
||||
~"memmove32" => {
|
||||
let dst_ptr = get_param(decl, first_real_arg);
|
||||
let src_ptr = get_param(decl, first_real_arg + 1);
|
||||
let size = get_param(decl, first_real_arg + 2);
|
||||
let align = C_i32(1);
|
||||
let tp_ty = substs.tys[0];
|
||||
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
||||
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
|
||||
let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);
|
||||
|
||||
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
|
||||
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
|
||||
let count = get_param(decl, first_real_arg + 2);
|
||||
let volatile = C_i1(false);
|
||||
let llfn = *bcx.ccx().intrinsics.get(
|
||||
&~"llvm.memmove.p0i8.p0i8.i32");
|
||||
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
|
||||
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memmove.p0i8.p0i8.i32");
|
||||
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
|
||||
}
|
||||
~"memmove64" => {
|
||||
let dst_ptr = get_param(decl, first_real_arg);
|
||||
let src_ptr = get_param(decl, first_real_arg + 1);
|
||||
let size = get_param(decl, first_real_arg + 2);
|
||||
let align = C_i32(1);
|
||||
let tp_ty = substs.tys[0];
|
||||
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
||||
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
|
||||
let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);
|
||||
|
||||
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
|
||||
let src_ptr = PointerCast(bcx, get_param(decl, first_real_arg + 1), T_ptr(T_i8()));
|
||||
let count = get_param(decl, first_real_arg + 2);
|
||||
let volatile = C_i1(false);
|
||||
let llfn = *bcx.ccx().intrinsics.get(
|
||||
&~"llvm.memmove.p0i8.p0i8.i64");
|
||||
Call(bcx, llfn, [dst_ptr, src_ptr, size, align, volatile]);
|
||||
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memmove.p0i8.p0i8.i64");
|
||||
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
|
||||
}
|
||||
~"memset32" => {
|
||||
let tp_ty = substs.tys[0];
|
||||
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
||||
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
|
||||
let size = C_i32(machine::llsize_of_real(ccx, lltp_ty) as i32);
|
||||
|
||||
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
|
||||
let val = get_param(decl, first_real_arg + 1);
|
||||
let count = get_param(decl, first_real_arg + 2);
|
||||
let volatile = C_i1(false);
|
||||
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memset.p0i8.i32");
|
||||
Call(bcx, llfn, [dst_ptr, val, Mul(bcx, size, count), align, volatile]);
|
||||
}
|
||||
~"memset64" => {
|
||||
let tp_ty = substs.tys[0];
|
||||
let lltp_ty = type_of::type_of(ccx, tp_ty);
|
||||
let align = C_i32(machine::llalign_of_min(ccx, lltp_ty) as i32);
|
||||
let size = C_i64(machine::llsize_of_real(ccx, lltp_ty) as i64);
|
||||
|
||||
let dst_ptr = PointerCast(bcx, get_param(decl, first_real_arg), T_ptr(T_i8()));
|
||||
let val = get_param(decl, first_real_arg + 1);
|
||||
let count = get_param(decl, first_real_arg + 2);
|
||||
let volatile = C_i1(false);
|
||||
let llfn = *bcx.ccx().intrinsics.get(&~"llvm.memset.p0i8.i64");
|
||||
Call(bcx, llfn, [dst_ptr, val, Mul(bcx, size, count), align, volatile]);
|
||||
}
|
||||
~"sqrtf32" => {
|
||||
let x = get_param(decl, first_real_arg);
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ pub fn duplicate_uniq(bcx: block, vptr: ValueRef, vec_ty: ty::t) -> Result {
|
|||
|
||||
let data_ptr = get_dataptr(bcx, get_bodyptr(bcx, vptr));
|
||||
let new_data_ptr = get_dataptr(bcx, get_bodyptr(bcx, newptr));
|
||||
base::call_memcpy(bcx, new_data_ptr, data_ptr, fill);
|
||||
base::call_memcpy(bcx, new_data_ptr, data_ptr, fill, 1);
|
||||
|
||||
let bcx = if ty::type_needs_drop(bcx.tcx(), unit_ty) {
|
||||
iter_vec_raw(bcx, new_data_ptr, vec_ty, fill, glue::take_ty)
|
||||
|
|
@ -370,7 +370,7 @@ pub fn write_content(bcx: block,
|
|||
let bytes = s.len() + 1; // copy null-terminator too
|
||||
let llbytes = C_uint(bcx.ccx(), bytes);
|
||||
let llcstr = C_cstr(bcx.ccx(), s);
|
||||
base::call_memcpy(bcx, lldest, llcstr, llbytes);
|
||||
base::call_memcpy(bcx, lldest, llcstr, llbytes, 1);
|
||||
return bcx;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -135,7 +135,8 @@ pub fn type_uses_for(ccx: @CrateContext, fn_id: def_id, n_tps: uint)
|
|||
~"visit_tydesc" | ~"forget" | ~"frame_address" |
|
||||
~"morestack_addr" => 0,
|
||||
|
||||
~"memcpy32" | ~"memcpy64" | ~"memmove32" | ~"memmove64" => 0,
|
||||
~"memcpy32" | ~"memcpy64" | ~"memmove32" | ~"memmove64" |
|
||||
~"memset32" | ~"memset64" => use_repr,
|
||||
|
||||
~"sqrtf32" | ~"sqrtf64" | ~"powif32" | ~"powif64" |
|
||||
~"sinf32" | ~"sinf64" | ~"cosf32" | ~"cosf64" |
|
||||
|
|
|
|||
|
|
@ -3538,14 +3538,14 @@ pub fn check_intrinsic_type(ccx: @mut CrateCtxt, it: @ast::foreign_item) {
|
|||
(0u, ~[], ty::mk_nil_ptr(ccx.tcx))
|
||||
}
|
||||
~"memcpy32" => {
|
||||
(0,
|
||||
(1,
|
||||
~[
|
||||
ty::mk_ptr(tcx, ty::mt {
|
||||
ty: ty::mk_u8(),
|
||||
ty: param(ccx, 0),
|
||||
mutbl: ast::m_mutbl
|
||||
}),
|
||||
ty::mk_ptr(tcx, ty::mt {
|
||||
ty: ty::mk_u8(),
|
||||
ty: param(ccx, 0),
|
||||
mutbl: ast::m_imm
|
||||
}),
|
||||
ty::mk_u32()
|
||||
|
|
@ -3553,14 +3553,14 @@ pub fn check_intrinsic_type(ccx: @mut CrateCtxt, it: @ast::foreign_item) {
|
|||
ty::mk_nil())
|
||||
}
|
||||
~"memcpy64" => {
|
||||
(0,
|
||||
(1,
|
||||
~[
|
||||
ty::mk_ptr(tcx, ty::mt {
|
||||
ty: ty::mk_u8(),
|
||||
ty: param(ccx, 0),
|
||||
mutbl: ast::m_mutbl
|
||||
}),
|
||||
ty::mk_ptr(tcx, ty::mt {
|
||||
ty: ty::mk_u8(),
|
||||
ty: param(ccx, 0),
|
||||
mutbl: ast::m_imm
|
||||
}),
|
||||
ty::mk_u64()
|
||||
|
|
@ -3568,14 +3568,14 @@ pub fn check_intrinsic_type(ccx: @mut CrateCtxt, it: @ast::foreign_item) {
|
|||
ty::mk_nil())
|
||||
}
|
||||
~"memmove32" => {
|
||||
(0,
|
||||
(1,
|
||||
~[
|
||||
ty::mk_ptr(tcx, ty::mt {
|
||||
ty: ty::mk_u8(),
|
||||
ty: param(ccx, 0),
|
||||
mutbl: ast::m_mutbl
|
||||
}),
|
||||
ty::mk_ptr(tcx, ty::mt {
|
||||
ty: ty::mk_u8(),
|
||||
ty: param(ccx, 0),
|
||||
mutbl: ast::m_imm
|
||||
}),
|
||||
ty::mk_u32()
|
||||
|
|
@ -3583,20 +3583,44 @@ pub fn check_intrinsic_type(ccx: @mut CrateCtxt, it: @ast::foreign_item) {
|
|||
ty::mk_nil())
|
||||
}
|
||||
~"memmove64" => {
|
||||
(0,
|
||||
(1,
|
||||
~[
|
||||
ty::mk_ptr(tcx, ty::mt {
|
||||
ty: ty::mk_u8(),
|
||||
ty: param(ccx, 0),
|
||||
mutbl: ast::m_mutbl
|
||||
}),
|
||||
ty::mk_ptr(tcx, ty::mt {
|
||||
ty: ty::mk_u8(),
|
||||
ty: param(ccx, 0),
|
||||
mutbl: ast::m_imm
|
||||
}),
|
||||
ty::mk_u64()
|
||||
],
|
||||
ty::mk_nil())
|
||||
}
|
||||
~"memset32" => {
|
||||
(1,
|
||||
~[
|
||||
ty::mk_ptr(tcx, ty::mt {
|
||||
ty: param(ccx, 0),
|
||||
mutbl: ast::m_mutbl
|
||||
}),
|
||||
ty::mk_u8(),
|
||||
ty::mk_u32()
|
||||
],
|
||||
ty::mk_nil())
|
||||
}
|
||||
~"memset64" => {
|
||||
(1,
|
||||
~[
|
||||
ty::mk_ptr(tcx, ty::mt {
|
||||
ty: param(ccx, 0),
|
||||
mutbl: ast::m_mutbl
|
||||
}),
|
||||
ty::mk_u8(),
|
||||
ty::mk_u64()
|
||||
],
|
||||
ty::mk_nil())
|
||||
}
|
||||
~"sqrtf32" => (0, ~[ ty::mk_f32() ], ty::mk_f32()),
|
||||
~"sqrtf64" => (0, ~[ ty::mk_f64() ], ty::mk_f64()),
|
||||
~"powif32" => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue