Add _mm_testz_si128, _mm_testc_si128 and _mm_testnzc_si128
This should work for any 128 bit sized vector, but it only accepts i64x2 for now
This commit is contained in:
parent
38f6087b9a
commit
4ce80f138b
1 changed files with 79 additions and 1 deletions
|
|
@ -402,7 +402,6 @@ pub unsafe fn _mm_mul_epi32(a: i32x4, b:i32x4) -> i64x2 {
|
|||
pmuldq(a, b)
|
||||
}
|
||||
|
||||
|
||||
/// Multiply the packed 32-bit integers in `a` and `b`, producing intermediate 64-bit integers,
|
||||
/// and return the low 32 bits of the intermediate integers.
|
||||
#[inline(always)]
|
||||
|
|
@ -412,6 +411,27 @@ pub unsafe fn _mm_mullo_epi32 (a: i32x4, b:i32x4) -> i32x4 {
|
|||
a * b
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
pub unsafe fn _mm_testz_si128(a: i64x2, mask: i64x2) -> i32 {
|
||||
ptestz(a, mask)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
pub unsafe fn _mm_testc_si128(a: i64x2, mask: i64x2) -> i32 {
|
||||
ptestc(a, mask)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
pub unsafe fn _mm_testnzc_si128(a: i64x2, mask: i64x2) -> i32 {
|
||||
ptestnzc(a, mask)
|
||||
}
|
||||
|
||||
/// Returns the dot product of two f64x2 vectors.
|
||||
///
|
||||
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
|
||||
|
|
@ -724,6 +744,12 @@ extern "C" {
|
|||
fn packusdw(a: i32x4, b: i32x4) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse41.pmuldq"]
|
||||
fn pmuldq(a: i32x4, b: i32x4) -> i64x2;
|
||||
#[link_name = "llvm.x86.sse41.ptestz"]
|
||||
fn ptestz(a: i64x2, mask: i64x2) -> i32;
|
||||
#[link_name = "llvm.x86.sse41.ptestc"]
|
||||
fn ptestc(a: i64x2, mask: i64x2) -> i32;
|
||||
#[link_name = "llvm.x86.sse41.ptestnzc"]
|
||||
fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
|
||||
#[link_name = "llvm.x86.sse41.dppd"]
|
||||
fn dppd(a: f64x2, b: f64x2, imm8: u8) -> f64x2;
|
||||
#[link_name = "llvm.x86.sse41.dpps"]
|
||||
|
|
@ -1175,6 +1201,58 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_testz_si128() {
|
||||
let a = i64x2::splat(1);
|
||||
let mask = i64x2::splat(0);
|
||||
let r = sse41::_mm_testz_si128(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
let a = i64x2::splat(0b101);
|
||||
let mask = i64x2::splat(0b110);
|
||||
let r = sse41::_mm_testz_si128(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
let a = i64x2::splat(0b011);
|
||||
let mask = i64x2::splat(0b100);
|
||||
let r = sse41::_mm_testz_si128(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_testc_si128() {
|
||||
let a = i64x2::splat(-1);
|
||||
let mask = i64x2::splat(0);
|
||||
let r = sse41::_mm_testc_si128(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
let a = i64x2::splat(0b101);
|
||||
let mask = i64x2::splat(0b110);
|
||||
let r = sse41::_mm_testc_si128(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
let a = i64x2::splat(0b101);
|
||||
let mask = i64x2::splat(0b100);
|
||||
let r = sse41::_mm_testc_si128(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_testnzc_si128() {
|
||||
let a = i64x2::splat(0);
|
||||
let mask = i64x2::splat(1);
|
||||
let r = sse41::_mm_testnzc_si128(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
let a = i64x2::splat(-1);
|
||||
let mask = i64x2::splat(0);
|
||||
let r = sse41::_mm_testnzc_si128(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
let a = i64x2::splat(0b101);
|
||||
let mask = i64x2::splat(0b110);
|
||||
let r = sse41::_mm_testnzc_si128(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
let a = i64x2::splat(0b101);
|
||||
let mask = i64x2::splat(0b101);
|
||||
let r = sse41::_mm_testnzc_si128(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_dp_pd() {
|
||||
let a = f64x2::new(2.0, 3.0);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue