Add _mm_packus_epi32 and _mm_cmpeq_epi64 intrinsics
This commit is contained in:
parent
a6d9d0c100
commit
d6c990967b
1 changed files with 41 additions and 4 deletions
|
|
@ -211,7 +211,7 @@ pub unsafe fn _mm_insert_epi64(a: i64x2, i: i64, imm8: u8) -> i64x2 {
|
|||
/// values in dst.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pmaxsb, imm8 = 0))]
|
||||
#[cfg_attr(test, assert_instr(pmaxsb))]
|
||||
pub unsafe fn _mm_max_epi8(a: i8x16, b: i8x16) -> i8x16 {
|
||||
pmaxsb(a, b)
|
||||
}
|
||||
|
|
@ -220,7 +220,7 @@ pub unsafe fn _mm_max_epi8(a: i8x16, b: i8x16) -> i8x16 {
|
|||
/// maximum.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pmaxuw, imm8 = 0))]
|
||||
#[cfg_attr(test, assert_instr(pmaxuw))]
|
||||
pub unsafe fn _mm_max_epu16(a: u16x8, b: u16x8) -> u16x8 {
|
||||
pmaxuw(a, b)
|
||||
}
|
||||
|
|
@ -229,7 +229,7 @@ pub unsafe fn _mm_max_epu16(a: u16x8, b: u16x8) -> u16x8 {
|
|||
/// values.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pmaxsd, imm8 = 0))]
|
||||
#[cfg_attr(test, assert_instr(pmaxsd))]
|
||||
pub unsafe fn _mm_max_epi32(a: i32x4, b: i32x4) -> i32x4 {
|
||||
pmaxsd(a, b)
|
||||
}
|
||||
|
|
@ -238,11 +238,28 @@ pub unsafe fn _mm_max_epi32(a: i32x4, b: i32x4) -> i32x4 {
|
|||
/// maximum values.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pmaxud, imm8 = 0))]
|
||||
#[cfg_attr(test, assert_instr(pmaxud))]
|
||||
pub unsafe fn _mm_max_epu32(a: u32x4, b: u32x4) -> u32x4 {
|
||||
pmaxud(a, b)
|
||||
}
|
||||
|
||||
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers using unsigned saturation
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(packusdw))]
|
||||
pub unsafe fn _mm_packus_epi32(a: i32x4, b: i32x4) -> u16x8 {
|
||||
packusdw(a, b)
|
||||
}
|
||||
|
||||
/// Compare packed 64-bit integers in `a` and `b` for equality
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(pcmpeqq))]
|
||||
pub unsafe fn _mm_cmpeq_epi64(a: i64x2, b: i64x2) -> i64x2 {
|
||||
a.eq(b)
|
||||
}
|
||||
|
||||
|
||||
/// Returns the dot product of two f64x2 vectors.
|
||||
///
|
||||
/// `imm8[1:0]` is the broadcast mask, and `imm8[5:4]` is the condition mask.
|
||||
|
|
@ -510,6 +527,8 @@ extern "C" {
|
|||
fn pmaxsd(a: i32x4, b: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.sse41.pmaxud"]
|
||||
fn pmaxud(a: u32x4, b: u32x4) -> u32x4;
|
||||
#[link_name = "llvm.x86.sse41.packusdw"]
|
||||
fn packusdw(a: i32x4, b: i32x4) -> u16x8;
|
||||
#[link_name = "llvm.x86.sse41.dppd"]
|
||||
fn dppd(a: f64x2, b: f64x2, imm8: u8) -> f64x2;
|
||||
#[link_name = "llvm.x86.sse41.dpps"]
|
||||
|
|
@ -723,6 +742,24 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_packus_epi32() {
|
||||
let a = i32x4::new(1, 2, 3, 4);
|
||||
let b = i32x4::new(-1, -2, -3, -4);
|
||||
let r = sse41::_mm_packus_epi32(a, b);
|
||||
let e = u16x8::new(1, 2, 3, 4, 0, 0, 0, 0);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_cmpeq_epi64() {
|
||||
let a = i64x2::new(0, 1);
|
||||
let b = i64x2::new(0, 0);
|
||||
let r = sse41::_mm_cmpeq_epi64(a, b);
|
||||
let e = i64x2::new(0xFFFFFFFFFFFFFFFF, 0x0);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_dp_pd() {
|
||||
let a = f64x2::new(2.0, 3.0);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue