Auto merge of #28136 - huonw:simd, r=alexcrichton

I believe everything that doesn't take a constant integer up to SSE4.2
should now be correct (I don't have any reason to believe that those
that do take constant integers are wrong; they're just more complicated
and I just haven't tested them in detail).

(Also, takes out two unused code paths from trans.)
This commit is contained in:
bors 2015-09-01 07:19:12 +00:00
commit 4ad128b95c
6 changed files with 35 additions and 64 deletions

View file

@ -19,8 +19,8 @@
"intrinsic": "_madd_epi16",
"width": [128],
"llvm": "pmadd.wd",
"ret": "s16",
"args": ["0", "0"]
"ret": "s32",
"args": ["s16", "s16"]
},
{
"intrinsic": "_max_{0.data_type}",
@ -68,11 +68,11 @@
"intrinsic": "_mul_epu32",
"width": [128],
"llvm": "pmulu.dq",
"ret": "s64",
"ret": "u64",
"args": ["0dn", "0dn"]
},
{
"intrinsic": "_mulhi_ep{0.kind}16",
"intrinsic": "_mulhi_{0.data_type}",
"width": [128],
"llvm": "pmulh{0.kind_short}.w",
"ret": "i16",

View file

@ -39,7 +39,7 @@
{
"intrinsic": "_mul_epi32",
"width": [128],
"llvm": "muldq",
"llvm": "pmuldq",
"ret": "s64",
"args": ["s32", "s32"]
},
@ -58,9 +58,9 @@
"args": ["u64", "u64"]
},
{
"intrinsic": "_testncz_si128",
"intrinsic": "_testnzc_si128",
"width": [128],
"llvm": "ptest.nzc",
"llvm": "ptestnzc",
"ret": "S32",
"args": ["u64", "u64"]
},

View file

@ -4,7 +4,7 @@
{
"intrinsic": "_abs_{0.data_type}",
"width": [128],
"llvm": "pabs.{0.data_type_short}",
"llvm": "pabs.{0.data_type_short}.128",
"ret": "s(8-32)",
"args": ["0"]
},
@ -41,7 +41,7 @@
"width": [128],
"llvm": "pmadd.ub.sw.128",
"ret": "s16",
"args": ["s8", "s8"]
"args": ["u8", "s8"]
},
{
"intrinsic": "_mulhrs_epi16",
@ -61,7 +61,7 @@
"intrinsic": "_sign_{0.data_type}",
"width": [128],
"llvm": "psign.{0.data_type_short}.128",
"ret": "s(8-16)",
"ret": "s(8-32)",
"args": ["0", "0"]
}
]

View file

@ -82,7 +82,7 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
},
"_madd_epi16" => Intrinsic {
inputs: vec![v(i(16), 8), v(i(16), 8)],
output: v(i(16), 8),
output: v(i(32), 4),
definition: Named("llvm.x86.sse2.pmadd.wd")
},
"_max_epi16" => Intrinsic {
@ -126,11 +126,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
definition: Named("llvm.x86.sse2.pmovmskb.128")
},
"_mul_epu32" => Intrinsic {
inputs: vec![v(i(32), 4), v(i(32), 4)],
output: v(i(64), 2),
inputs: vec![v(u(32), 4), v(u(32), 4)],
output: v(u(64), 2),
definition: Named("llvm.x86.sse2.pmulu.dq")
},
"_mulhi_eps16" => Intrinsic {
"_mulhi_epi16" => Intrinsic {
inputs: vec![v(i(16), 8), v(i(16), 8)],
output: v(i(16), 8),
definition: Named("llvm.x86.sse2.pmulh.w")
@ -218,17 +218,17 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
"_abs_epi8" => Intrinsic {
inputs: vec![v(i(8), 16)],
output: v(i(8), 16),
definition: Named("llvm.x86.ssse3.pabs.b")
definition: Named("llvm.x86.ssse3.pabs.b.128")
},
"_abs_epi16" => Intrinsic {
inputs: vec![v(i(16), 8)],
output: v(i(16), 8),
definition: Named("llvm.x86.ssse3.pabs.w")
definition: Named("llvm.x86.ssse3.pabs.w.128")
},
"_abs_epi32" => Intrinsic {
inputs: vec![v(i(32), 4)],
output: v(i(32), 4),
definition: Named("llvm.x86.ssse3.pabs.d")
definition: Named("llvm.x86.ssse3.pabs.d.128")
},
"_hadd_epi16" => Intrinsic {
inputs: vec![v(i(16), 8), v(i(16), 8)],
@ -261,7 +261,7 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
definition: Named("llvm.x86.ssse3.phsub.sw.128")
},
"_maddubs_epi16" => Intrinsic {
inputs: vec![v(i(8), 16), v(i(8), 16)],
inputs: vec![v(u(8), 16), v(i(8), 16)],
output: v(i(16), 8),
definition: Named("llvm.x86.ssse3.pmadd.ub.sw.128")
},
@ -285,6 +285,11 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: v(i(16), 8),
definition: Named("llvm.x86.ssse3.psign.w.128")
},
"_sign_epi32" => Intrinsic {
inputs: vec![v(i(32), 4), v(i(32), 4)],
output: v(i(32), 4),
definition: Named("llvm.x86.ssse3.psign.d.128")
},
"_dp_ps" => Intrinsic {
inputs: vec![v(f(32), 4), v(f(32), 4), i_(32, 8)],
output: v(f(32), 4),
@ -348,7 +353,7 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
"_mul_epi32" => Intrinsic {
inputs: vec![v(i(32), 4), v(i(32), 4)],
output: v(i(64), 2),
definition: Named("llvm.x86.sse41.muldq")
definition: Named("llvm.x86.sse41.pmuldq")
},
"_packus_epi32" => Intrinsic {
inputs: vec![v(i(32), 4), v(i(32), 4)],
@ -360,10 +365,10 @@ pub fn find<'tcx>(_tcx: &ty::ctxt<'tcx>, name: &str) -> Option<Intrinsic> {
output: i(32),
definition: Named("llvm.x86.sse41.ptestc")
},
"_testncz_si128" => Intrinsic {
"_testnzc_si128" => Intrinsic {
inputs: vec![v(u(64), 2), v(u(64), 2)],
output: i(32),
definition: Named("llvm.x86.sse41.ptest.nzc")
definition: Named("llvm.x86.sse41.ptestnzc")
},
"_testz_si128" => Intrinsic {
inputs: vec![v(u(64), 2), v(u(64), 2)],

View file

@ -501,14 +501,9 @@ fn const_expr_unadjusted<'a, 'tcx>(cx: &CrateContext<'a, 'tcx>,
debug!("const_expr_unadjusted: te1={}, ty={:?}",
cx.tn().val_to_string(te1),
ty);
let is_simd = ty.is_simd();
let intype = if is_simd {
ty.simd_type(cx.tcx())
} else {
ty
};
let is_float = intype.is_fp();
let signed = intype.is_signed();
assert!(!ty.is_simd());
let is_float = ty.is_fp();
let signed = ty.is_signed();
let (te2, _) = const_expr(cx, &**e2, param_substs, fn_args);
@ -552,14 +547,7 @@ fn const_expr_unadjusted<'a, 'tcx>(cx: &CrateContext<'a, 'tcx>,
ConstFCmp(cmp, te1, te2)
} else {
let cmp = base::bin_op_to_icmp_predicate(cx, b.node, signed);
let bool_val = ConstICmp(cmp, te1, te2);
if is_simd {
// LLVM outputs an `< size x i1 >`, so we need to perform
// a sign extension to get the correctly sized type.
llvm::LLVMConstIntCast(bool_val, val_ty(te1).to_ref(), True)
} else {
bool_val
}
ConstICmp(cmp, te1, te2)
}
},
} } // unsafe { match b.node {

View file

@ -1693,14 +1693,9 @@ fn trans_eager_binop<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
let _icx = push_ctxt("trans_eager_binop");
let tcx = bcx.tcx();
let is_simd = lhs_t.is_simd();
let intype = if is_simd {
lhs_t.simd_type(tcx)
} else {
lhs_t
};
let is_float = intype.is_fp();
let is_signed = intype.is_signed();
assert!(!lhs_t.is_simd());
let is_float = lhs_t.is_fp();
let is_signed = lhs_t.is_signed();
let info = expr_info(binop_expr);
let binop_debug_loc = binop_expr.debug_loc();
@ -1710,8 +1705,6 @@ fn trans_eager_binop<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
ast::BiAdd => {
if is_float {
FAdd(bcx, lhs, rhs, binop_debug_loc)
} else if is_simd {
Add(bcx, lhs, rhs, binop_debug_loc)
} else {
let (newbcx, res) = with_overflow_check(
bcx, OverflowOp::Add, info, lhs_t, lhs, rhs, binop_debug_loc);
@ -1722,8 +1715,6 @@ fn trans_eager_binop<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
ast::BiSub => {
if is_float {
FSub(bcx, lhs, rhs, binop_debug_loc)
} else if is_simd {
Sub(bcx, lhs, rhs, binop_debug_loc)
} else {
let (newbcx, res) = with_overflow_check(
bcx, OverflowOp::Sub, info, lhs_t, lhs, rhs, binop_debug_loc);
@ -1734,8 +1725,6 @@ fn trans_eager_binop<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
ast::BiMul => {
if is_float {
FMul(bcx, lhs, rhs, binop_debug_loc)
} else if is_simd {
Mul(bcx, lhs, rhs, binop_debug_loc)
} else {
let (newbcx, res) = with_overflow_check(
bcx, OverflowOp::Mul, info, lhs_t, lhs, rhs, binop_debug_loc);
@ -1828,11 +1817,7 @@ fn trans_eager_binop<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
res
}
ast::BiEq | ast::BiNe | ast::BiLt | ast::BiGe | ast::BiLe | ast::BiGt => {
if is_simd {
base::compare_simd_types(bcx, lhs, rhs, intype, val_ty(lhs), op.node, binop_debug_loc)
} else {
base::compare_scalar_types(bcx, lhs, rhs, intype, op.node, binop_debug_loc)
}
base::compare_scalar_types(bcx, lhs, rhs, lhs_t, op.node, binop_debug_loc)
}
_ => {
bcx.tcx().sess.span_bug(binop_expr.span, "unexpected binop");
@ -2533,14 +2518,7 @@ fn build_unchecked_rshift<'blk, 'tcx>(bcx: Block<'blk, 'tcx>,
let rhs = base::cast_shift_expr_rhs(bcx, ast::BinOp_::BiShr, lhs, rhs);
// #1877, #10183: Ensure that input is always valid
let rhs = shift_mask_rhs(bcx, rhs, binop_debug_loc);
let tcx = bcx.tcx();
let is_simd = lhs_t.is_simd();
let intype = if is_simd {
lhs_t.simd_type(tcx)
} else {
lhs_t
};
let is_signed = intype.is_signed();
let is_signed = lhs_t.is_signed();
if is_signed {
AShr(bcx, lhs, rhs, binop_debug_loc)
} else {