Add _mm_testz_si128, _mm_testc_si128 and _mm_testnzc_si128

This should work for any 128 bit sized vector, but it only accepts i64x2 for now
This commit is contained in:
André Oliveira 2017-11-06 17:24:55 +00:00 committed by gnzlbg
parent 38f6087b9a
commit 4ce80f138b

View file

@ -402,7 +402,6 @@ pub unsafe fn _mm_mul_epi32(a: i32x4, b:i32x4) -> i64x2 {
pmuldq(a, b)
}
/// Multiply the packed 32-bit integers in `a` and `b`, producing intermediate 64-bit integers,
/// and return the low 32 bits of the intermediate integers.
#[inline(always)]
@ -412,6 +411,27 @@ pub unsafe fn _mm_mullo_epi32 (a: i32x4, b:i32x4) -> i32x4 {
a * b
}
#[inline(always)]
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testz_si128(a: i64x2, mask: i64x2) -> i32 {
ptestz(a, mask)
}
#[inline(always)]
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testc_si128(a: i64x2, mask: i64x2) -> i32 {
ptestc(a, mask)
}
#[inline(always)]
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testnzc_si128(a: i64x2, mask: i64x2) -> i32 {
ptestnzc(a, mask)
}
/// Returns the dot product of two f64x2 vectors.
///
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
@ -724,6 +744,12 @@ extern "C" {
fn packusdw(a: i32x4, b: i32x4) -> u16x8;
#[link_name = "llvm.x86.sse41.pmuldq"]
fn pmuldq(a: i32x4, b: i32x4) -> i64x2;
#[link_name = "llvm.x86.sse41.ptestz"]
fn ptestz(a: i64x2, mask: i64x2) -> i32;
#[link_name = "llvm.x86.sse41.ptestc"]
fn ptestc(a: i64x2, mask: i64x2) -> i32;
#[link_name = "llvm.x86.sse41.ptestnzc"]
fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
#[link_name = "llvm.x86.sse41.dppd"]
fn dppd(a: f64x2, b: f64x2, imm8: u8) -> f64x2;
#[link_name = "llvm.x86.sse41.dpps"]
@ -1175,6 +1201,58 @@ mod tests {
assert_eq!(r, e);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_testz_si128() {
let a = i64x2::splat(1);
let mask = i64x2::splat(0);
let r = sse41::_mm_testz_si128(a, mask);
assert_eq!(r, 1);
let a = i64x2::splat(0b101);
let mask = i64x2::splat(0b110);
let r = sse41::_mm_testz_si128(a, mask);
assert_eq!(r, 0);
let a = i64x2::splat(0b011);
let mask = i64x2::splat(0b100);
let r = sse41::_mm_testz_si128(a, mask);
assert_eq!(r, 1);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_testc_si128() {
let a = i64x2::splat(-1);
let mask = i64x2::splat(0);
let r = sse41::_mm_testc_si128(a, mask);
assert_eq!(r, 1);
let a = i64x2::splat(0b101);
let mask = i64x2::splat(0b110);
let r = sse41::_mm_testc_si128(a, mask);
assert_eq!(r, 0);
let a = i64x2::splat(0b101);
let mask = i64x2::splat(0b100);
let r = sse41::_mm_testc_si128(a, mask);
assert_eq!(r, 1);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_testnzc_si128() {
let a = i64x2::splat(0);
let mask = i64x2::splat(1);
let r = sse41::_mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
let a = i64x2::splat(-1);
let mask = i64x2::splat(0);
let r = sse41::_mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
let a = i64x2::splat(0b101);
let mask = i64x2::splat(0b110);
let r = sse41::_mm_testnzc_si128(a, mask);
assert_eq!(r, 1);
let a = i64x2::splat(0b101);
let mask = i64x2::splat(0b101);
let r = sse41::_mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_dp_pd() {
let a = f64x2::new(2.0, 3.0);