Translate SIMD construction as insertelements and a single store.

This almost completely avoids GEPi's and pointer manipulation,
postponing it until the end with one big write of the whole vector. This
leads to a small speed-up in compilation, and makes it easier for LLVM
to work with the values, e.g. with `--opt-level=0`,

    pub fn foo() -> f32x4 {
        f32x4(0.,0.,0.,0.)
    }

was previously compiled to

    define <4 x float> @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 {
    entry-block:
      %sret_slot = alloca <4 x float>
      %0 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 0
      store float 0.000000e+00, float* %0
      %1 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 1
      store float 0.000000e+00, float* %1
      %2 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 2
      store float 0.000000e+00, float* %2
      %3 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 3
      store float 0.000000e+00, float* %3
      %4 = load <4 x float>* %sret_slot
      ret <4 x float> %4
    }

but now becomes

    define <4 x float> @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 {
    entry-block:
      ret <4 x float> zeroinitializer
    }
This commit is contained in:
Huon Wilson 2014-11-04 20:59:00 +11:00
parent ff50f24feb
commit 071c411045

View file

@ -1455,14 +1455,35 @@ pub fn trans_adt<'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
None => {}
};
// Now, we just overwrite the fields we've explicitly specified
for &(i, ref e) in fields.iter() {
let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i);
let e_ty = expr_ty_adjusted(bcx, &**e);
bcx = trans_into(bcx, &**e, SaveIn(dest));
let scope = cleanup::CustomScope(custom_cleanup_scope);
fcx.schedule_lifetime_end(scope, dest);
fcx.schedule_drop_mem(scope, dest, e_ty);
if ty::type_is_simd(bcx.tcx(), ty) {
// This is the constructor of a SIMD type, such types are
// always primitive machine types and so do not have a
// destructor or require any clean-up.
let llty = type_of::type_of(bcx.ccx(), ty);
// keep a vector as a register, and running through the field
// `insertelement`ing them directly into that register
// (i.e. avoid GEPi and `store`s to an alloca) .
let mut vec_val = C_undef(llty);
for &(i, ref e) in fields.iter() {
let block_datum = trans(bcx, &**e);
bcx = block_datum.bcx;
let position = C_uint(bcx.ccx(), i);
let value = block_datum.datum.to_llscalarish(bcx);
vec_val = InsertElement(bcx, vec_val, value, position);
}
Store(bcx, vec_val, addr);
} else {
// Now, we just overwrite the fields we've explicitly specified
for &(i, ref e) in fields.iter() {
let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i);
let e_ty = expr_ty_adjusted(bcx, &**e);
bcx = trans_into(bcx, &**e, SaveIn(dest));
let scope = cleanup::CustomScope(custom_cleanup_scope);
fcx.schedule_lifetime_end(scope, dest);
fcx.schedule_drop_mem(scope, dest, e_ty);
}
}
adt::trans_set_discr(bcx, &*repr, addr, discr);