Migrate the i686 module to vendor types (#279)
* Migrate `i686::sse` to vendor types * Migrate `i686::sse2` to vendor types * Migrate i686::sse41 to vendor types * Migrate i686::sse42 to vendor types
This commit is contained in:
parent
48a7490711
commit
e77ebf194a
5 changed files with 298 additions and 244 deletions
|
|
@ -185,6 +185,69 @@ pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
|
|||
punpckldq(a, b)
|
||||
}
|
||||
|
||||
/// Set packed 16-bit integers in dst with the supplied values.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 {
|
||||
_mm_setr_pi16(e0, e1, e2, e3)
|
||||
}
|
||||
|
||||
/// Set packed 32-bit integers in dst with the supplied values.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 {
|
||||
_mm_setr_pi32(e0, e1)
|
||||
}
|
||||
|
||||
/// Set packed 8-bit integers in dst with the supplied values.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 {
|
||||
_mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7)
|
||||
}
|
||||
|
||||
/// Broadcast 16-bit integer a to all all elements of dst.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 {
|
||||
_mm_setr_pi16(a, a, a, a)
|
||||
}
|
||||
|
||||
/// Broadcast 32-bit integer a to all all elements of dst.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 {
|
||||
_mm_setr_pi32(a, a)
|
||||
}
|
||||
|
||||
/// Broadcast 8-bit integer a to all all elements of dst.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 {
|
||||
_mm_setr_pi8(a, a, a, a, a, a, a, a)
|
||||
}
|
||||
|
||||
/// Set packed 16-bit integers in dst with the supplied values in reverse order.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 {
|
||||
mem::transmute(i16x4::new(e0, e1, e2, e3))
|
||||
}
|
||||
|
||||
/// Set packed 32-bit integers in dst with the supplied values in reverse order.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
|
||||
mem::transmute(i32x2::new(e0, e1))
|
||||
}
|
||||
|
||||
/// Set packed 8-bit integers in dst with the supplied values in reverse order.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
pub unsafe fn _mm_setr_pi8(e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8) -> __m64 {
|
||||
mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.mmx.padd.b"]
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
//! `i686` Streaming SIMD Extensions (SSE)
|
||||
|
||||
use v64::*;
|
||||
use core::mem;
|
||||
use x86::*;
|
||||
|
||||
|
|
@ -204,7 +203,7 @@ pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: i32x2) -> __m128 {
|
||||
pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
|
||||
cvtpi2ps(a, mem::transmute(b))
|
||||
}
|
||||
|
||||
|
|
@ -215,7 +214,7 @@ pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: i32x2) -> __m128 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: i32x2) -> __m128 {
|
||||
pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 {
|
||||
_mm_cvtpi32_ps(a, b)
|
||||
}
|
||||
|
||||
|
|
@ -274,7 +273,7 @@ pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi32x2_ps(a: i32x2, b: i32x2) -> __m128 {
|
||||
pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
|
||||
let c = i586::_mm_setzero_ps();
|
||||
let c = _mm_cvtpi32_ps(c, b);
|
||||
let c = i586::_mm_movelh_ps(c, c);
|
||||
|
|
@ -314,7 +313,7 @@ pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
|
||||
pub unsafe fn _mm_extract_pi16(a: i16x4, imm2: i32) -> i16 {
|
||||
pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i16 {
|
||||
macro_rules! call {
|
||||
($imm2:expr) => { pextrw(mem::transmute(a), $imm2) as i16 }
|
||||
}
|
||||
|
|
@ -326,7 +325,7 @@ pub unsafe fn _mm_extract_pi16(a: i16x4, imm2: i32) -> i16 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
|
||||
pub unsafe fn _m_pextrw(a: i16x4, imm2: i32) -> i16 {
|
||||
pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i16 {
|
||||
_mm_extract_pi16(a, imm2)
|
||||
}
|
||||
|
||||
|
|
@ -359,7 +358,7 @@ pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(pmovmskb))]
|
||||
pub unsafe fn _mm_movemask_pi8(a: i16x4) -> i32 {
|
||||
pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
|
||||
pmovmskb(mem::transmute(a))
|
||||
}
|
||||
|
||||
|
|
@ -369,7 +368,7 @@ pub unsafe fn _mm_movemask_pi8(a: i16x4) -> i32 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(pmovmskb))]
|
||||
pub unsafe fn _m_pmovmskb(a: i16x4) -> i32 {
|
||||
pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
|
||||
_mm_movemask_pi8(a)
|
||||
}
|
||||
|
||||
|
|
@ -399,7 +398,7 @@ pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvttps2pi))]
|
||||
pub unsafe fn _mm_cvttps_pi32(a: __m128) -> i32x2 {
|
||||
pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
|
||||
mem::transmute(cvttps2pi(a))
|
||||
}
|
||||
|
||||
|
|
@ -408,7 +407,7 @@ pub unsafe fn _mm_cvttps_pi32(a: __m128) -> i32x2 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(cvttps2pi))]
|
||||
pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> i32x2 {
|
||||
pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
|
||||
_mm_cvttps_pi32(a)
|
||||
}
|
||||
|
||||
|
|
@ -458,107 +457,99 @@ pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
|
|||
mod tests {
|
||||
use std::mem;
|
||||
|
||||
use v64::{i16x4, i32x2, i8x8, u16x4, u8x8};
|
||||
use x86::*;
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
#[target_feature = "+avx"]
|
||||
unsafe fn assert_eq_m128(a: __m128, b: __m128) {
|
||||
let r = _mm_cmpeq_ps(a, b);
|
||||
if _mm_movemask_ps(r) != 0b1111 {
|
||||
panic!("{:?} != {:?}", a, b);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_max_pi16() {
|
||||
let a = i16x4::new(-1, 6, -3, 8);
|
||||
let b = i16x4::new(5, -2, 7, -4);
|
||||
let r = i16x4::new(5, 6, 7, 8);
|
||||
let a = _mm_setr_pi16(-1, 6, -3, 8);
|
||||
let b = _mm_setr_pi16(5, -2, 7, -4);
|
||||
let r = _mm_setr_pi16(5, 6, 7, 8);
|
||||
|
||||
assert_eq!(r, i16x4::from(_mm_max_pi16(a.into(), b.into())));
|
||||
assert_eq!(r, i16x4::from(_m_pmaxsw(a.into(), b.into())));
|
||||
assert_eq!(r, _mm_max_pi16(a, b));
|
||||
assert_eq!(r, _m_pmaxsw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_max_pu8() {
|
||||
let a = u8x8::new(2, 6, 3, 8, 2, 6, 3, 8);
|
||||
let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4);
|
||||
let r = u8x8::new(5, 6, 7, 8, 5, 6, 7, 8);
|
||||
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
|
||||
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
|
||||
let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8);
|
||||
|
||||
assert_eq!(r, u8x8::from(_mm_max_pu8(a.into(), b.into())));
|
||||
assert_eq!(r, u8x8::from(_m_pmaxub(a.into(), b.into())));
|
||||
assert_eq!(r, _mm_max_pu8(a, b));
|
||||
assert_eq!(r, _m_pmaxub(a, b));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_min_pi16() {
|
||||
let a = i16x4::new(-1, 6, -3, 8);
|
||||
let b = i16x4::new(5, -2, 7, -4);
|
||||
let r = i16x4::new(-1, -2, -3, -4);
|
||||
let a = _mm_setr_pi16(-1, 6, -3, 8);
|
||||
let b = _mm_setr_pi16(5, -2, 7, -4);
|
||||
let r = _mm_setr_pi16(-1, -2, -3, -4);
|
||||
|
||||
assert_eq!(r, i16x4::from(_mm_min_pi16(a.into(), b.into())));
|
||||
assert_eq!(r, i16x4::from(_m_pminsw(a.into(), b.into())));
|
||||
assert_eq!(r, _mm_min_pi16(a, b));
|
||||
assert_eq!(r, _m_pminsw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_min_pu8() {
|
||||
let a = u8x8::new(2, 6, 3, 8, 2, 6, 3, 8);
|
||||
let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4);
|
||||
let r = u8x8::new(2, 2, 3, 4, 2, 2, 3, 4);
|
||||
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
|
||||
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
|
||||
let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4);
|
||||
|
||||
assert_eq!(r, u8x8::from(_mm_min_pu8(a.into(), b.into())));
|
||||
assert_eq!(r, u8x8::from(_m_pminub(a.into(), b.into())));
|
||||
assert_eq!(r, _mm_min_pu8(a, b));
|
||||
assert_eq!(r, _m_pminub(a, b));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_mulhi_pu16() {
|
||||
let (a, b) = (u16x4::splat(1000), u16x4::splat(1001));
|
||||
let r = u16x4::from(_mm_mulhi_pu16(a.into(), b.into()));
|
||||
assert_eq!(r, u16x4::splat(15));
|
||||
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
|
||||
let r = _mm_mulhi_pu16(a, b);
|
||||
assert_eq!(r, _mm_set1_pi16(15));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_m_pmulhuw() {
|
||||
let (a, b) = (u16x4::splat(1000), u16x4::splat(1001));
|
||||
let r = _m_pmulhuw(a.into(), b.into());
|
||||
assert_eq!(r, u16x4::splat(15).into());
|
||||
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
|
||||
let r = _m_pmulhuw(a, b);
|
||||
assert_eq!(r, _mm_set1_pi16(15));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_avg_pu8() {
|
||||
let (a, b) = (u8x8::splat(3), u8x8::splat(9));
|
||||
let r = u8x8::from(_mm_avg_pu8(a.into(), b.into()));
|
||||
assert_eq!(r, u8x8::splat(6));
|
||||
let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9));
|
||||
let r = _mm_avg_pu8(a, b);
|
||||
assert_eq!(r, _mm_set1_pi8(6));
|
||||
|
||||
let r = u8x8::from(_m_pavgb(a.into(), b.into()));
|
||||
assert_eq!(r, u8x8::splat(6));
|
||||
let r = _m_pavgb(a, b);
|
||||
assert_eq!(r, _mm_set1_pi8(6));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_avg_pu16() {
|
||||
let (a, b) = (u16x4::splat(3), u16x4::splat(9));
|
||||
let r = u16x4::from(_mm_avg_pu16(a.into(), b.into()));
|
||||
assert_eq!(r, u16x4::splat(6));
|
||||
let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9));
|
||||
let r = _mm_avg_pu16(a, b);
|
||||
assert_eq!(r, _mm_set1_pi16(6));
|
||||
|
||||
let r = u16x4::from(_m_pavgw(a.into(), b.into()));
|
||||
assert_eq!(r, u16x4::splat(6));
|
||||
let r = _m_pavgw(a, b);
|
||||
assert_eq!(r, _mm_set1_pi16(6));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_sad_pu8() {
|
||||
let a = u8x8::new(255, 254, 253, 252, 1, 2, 3, 4);
|
||||
let b = u8x8::new(0, 0, 0, 0, 2, 1, 2, 1);
|
||||
let r = _mm_sad_pu8(a.into(), b.into());
|
||||
assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0)));
|
||||
let a = _mm_setr_pi8(255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
|
||||
1, 2, 3, 4);
|
||||
let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1);
|
||||
let r = _mm_sad_pu8(a, b);
|
||||
assert_eq!(r, mem::transmute(_mm_setr_pi16(1020, 0, 0, 0)));
|
||||
|
||||
let r = _m_psadbw(a.into(), b.into());
|
||||
assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0)));
|
||||
let r = _m_psadbw(a, b);
|
||||
assert_eq!(r, mem::transmute(_mm_setr_pi16(1020, 0, 0, 0)));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_cvtpi32_ps() {
|
||||
let a = _mm_setr_ps(0., 0., 3., 4.);
|
||||
let b = i32x2::new(1, 2);
|
||||
let b = _mm_setr_pi32(1, 2);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpi32_ps(a, b);
|
||||
assert_eq_m128(r, expected);
|
||||
|
|
@ -569,40 +560,40 @@ mod tests {
|
|||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_cvtpi16_ps() {
|
||||
let a = i16x4::new(1, 2, 3, 4);
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpi16_ps(a.into());
|
||||
let r = _mm_cvtpi16_ps(a);
|
||||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_cvtpu16_ps() {
|
||||
let a = u16x4::new(1, 2, 3, 4);
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpu16_ps(a.into());
|
||||
let r = _mm_cvtpu16_ps(a);
|
||||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_cvtpi8_ps() {
|
||||
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpi8_ps(a.into());
|
||||
let r = _mm_cvtpi8_ps(a);
|
||||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_cvtpu8_ps() {
|
||||
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpu8_ps(a.into());
|
||||
let r = _mm_cvtpu8_ps(a);
|
||||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_cvtpi32x2_ps() {
|
||||
let a = i32x2::new(1, 2);
|
||||
let b = i32x2::new(3, 4);
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let b = _mm_setr_pi32(3, 4);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpi32x2_ps(a, b);
|
||||
assert_eq_m128(r, expected);
|
||||
|
|
@ -610,24 +601,25 @@ mod tests {
|
|||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_maskmove_si64() {
|
||||
let a = i8x8::splat(9);
|
||||
let mask = i8x8::splat(0).replace(2, 0x80u8 as i8);
|
||||
let mut r = i8x8::splat(0);
|
||||
let a = _mm_set1_pi8(9);
|
||||
let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0);
|
||||
let mut r = _mm_set1_pi8(0);
|
||||
_mm_maskmove_si64(
|
||||
a.into(),
|
||||
mask.into(),
|
||||
a,
|
||||
mask,
|
||||
&mut r as *mut _ as *mut i8,
|
||||
);
|
||||
assert_eq!(r, i8x8::splat(0).replace(2, 9));
|
||||
let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0);
|
||||
assert_eq!(r, e);
|
||||
|
||||
let mut r = i8x8::splat(0);
|
||||
_m_maskmovq(a.into(), mask.into(), &mut r as *mut _ as *mut i8);
|
||||
assert_eq!(r, i8x8::splat(0).replace(2, 9));
|
||||
let mut r = _mm_set1_pi8(0);
|
||||
_m_maskmovq(a, mask, &mut r as *mut _ as *mut i8);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_extract_pi16() {
|
||||
let a = i16x4::new(1, 2, 3, 4);
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let r = _mm_extract_pi16(a, 0);
|
||||
assert_eq!(r, 1);
|
||||
let r = _mm_extract_pi16(a, 1);
|
||||
|
|
@ -639,21 +631,21 @@ mod tests {
|
|||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_insert_pi16() {
|
||||
let a = i16x4::new(1, 2, 3, 4);
|
||||
let r = i16x4::from(_mm_insert_pi16(a.into(), 0, 0b0));
|
||||
let expected = i16x4::new(0, 2, 3, 4);
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let r = _mm_insert_pi16(a, 0, 0b0);
|
||||
let expected = _mm_setr_pi16(0, 2, 3, 4);
|
||||
assert_eq!(r, expected);
|
||||
let r = i16x4::from(_mm_insert_pi16(a.into(), 0, 0b10));
|
||||
let expected = i16x4::new(1, 2, 0, 4);
|
||||
let r = _mm_insert_pi16(a, 0, 0b10);
|
||||
let expected = _mm_setr_pi16(1, 2, 0, 4);
|
||||
assert_eq!(r, expected);
|
||||
|
||||
let r = i16x4::from(_m_pinsrw(a.into(), 0, 0b10));
|
||||
let r = _m_pinsrw(a, 0, 0b10);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_movemask_pi8() {
|
||||
let a = i16x4::new(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
|
||||
let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
|
||||
let r = _mm_movemask_pi8(a);
|
||||
assert_eq!(r, 0b10001);
|
||||
|
||||
|
|
@ -663,28 +655,28 @@ mod tests {
|
|||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_shuffle_pi16() {
|
||||
let a = i16x4::new(1, 2, 3, 4);
|
||||
let r = i16x4::from(_mm_shuffle_pi16(a.into(), 0b00_01_01_11));
|
||||
let expected = i16x4::new(4, 2, 2, 1);
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let r = _mm_shuffle_pi16(a, 0b00_01_01_11);
|
||||
let expected = _mm_setr_pi16(4, 2, 2, 1);
|
||||
assert_eq!(r, expected);
|
||||
|
||||
let r = i16x4::from(_m_pshufw(a.into(), 0b00_01_01_11));
|
||||
let r = _m_pshufw(a, 0b00_01_01_11);
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_cvtps_pi32() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let r = i32x2::new(1, 2);
|
||||
let r = _mm_setr_pi32(1, 2);
|
||||
|
||||
assert_eq!(r, i32x2::from(_mm_cvtps_pi32(a)));
|
||||
assert_eq!(r, i32x2::from(_mm_cvt_ps2pi(a)));
|
||||
assert_eq!(r, _mm_cvtps_pi32(a));
|
||||
assert_eq!(r, _mm_cvt_ps2pi(a));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_cvttps_pi32() {
|
||||
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
|
||||
let r = i32x2::new(7, 2);
|
||||
let r = _mm_setr_pi32(7, 2);
|
||||
|
||||
assert_eq!(r, _mm_cvttps_pi32(a));
|
||||
assert_eq!(r, _mm_cvtt_ps2pi(a));
|
||||
|
|
@ -693,14 +685,14 @@ mod tests {
|
|||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_cvtps_pi16() {
|
||||
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
|
||||
let r = i16x4::new(7, 2, 3, 4);
|
||||
assert_eq!(r, i16x4::from(_mm_cvtps_pi16(a)));
|
||||
let r = _mm_setr_pi16(7, 2, 3, 4);
|
||||
assert_eq!(r, _mm_cvtps_pi16(a));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
unsafe fn test_mm_cvtps_pi8() {
|
||||
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
|
||||
let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0);
|
||||
assert_eq!(r, i8x8::from(_mm_cvtps_pi8(a)));
|
||||
let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0);
|
||||
assert_eq!(r, _mm_cvtps_pi8(a));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
//! `i686`'s Streaming SIMD Extensions 2 (SSE2)
|
||||
|
||||
use core::mem;
|
||||
use v128::*;
|
||||
use v64::*;
|
||||
|
||||
use simd_llvm::simd_extract;
|
||||
use x86::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
|
@ -22,7 +23,7 @@ pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
#[cfg_attr(test, assert_instr(pmuludq))]
|
||||
pub unsafe fn _mm_mul_su32(a: u32x2, b: u32x2) -> __m64 {
|
||||
pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
|
||||
pmuludq(mem::transmute(a), mem::transmute(b))
|
||||
}
|
||||
|
||||
|
|
@ -41,8 +42,8 @@ pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2pd))]
|
||||
pub unsafe fn _mm_cvtpi32_pd(a: i32x2) -> f64x2 {
|
||||
cvtpi2pd(mem::transmute(a))
|
||||
pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d {
|
||||
cvtpi2pd(a)
|
||||
}
|
||||
|
||||
/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
|
||||
|
|
@ -50,8 +51,8 @@ pub unsafe fn _mm_cvtpi32_pd(a: i32x2) -> f64x2 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
// no particular instruction to test
|
||||
pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> i64x2 {
|
||||
i64x2::new(mem::transmute(e0), mem::transmute(e1))
|
||||
pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i {
|
||||
_mm_set_epi64x(mem::transmute(e1), mem::transmute(e0))
|
||||
}
|
||||
|
||||
/// Initializes both values in a 128-bit vector of [2 x i64] with the
|
||||
|
|
@ -59,8 +60,8 @@ pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> i64x2 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
// no particular instruction to test
|
||||
pub unsafe fn _mm_set1_epi64(a: __m64) -> i64x2 {
|
||||
i64x2::new(mem::transmute(a), mem::transmute(a))
|
||||
pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i {
|
||||
_mm_set_epi64x(mem::transmute(a), mem::transmute(a))
|
||||
}
|
||||
|
||||
/// Constructs a 128-bit integer vector, initialized in reverse order
|
||||
|
|
@ -68,8 +69,8 @@ pub unsafe fn _mm_set1_epi64(a: __m64) -> i64x2 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
// no particular instruction to test
|
||||
pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> i64x2 {
|
||||
i64x2::new(mem::transmute(e1), mem::transmute(e0))
|
||||
pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i {
|
||||
_mm_set_epi64x(mem::transmute(e0), mem::transmute(e1))
|
||||
}
|
||||
|
||||
/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
|
||||
|
|
@ -78,8 +79,8 @@ pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> i64x2 {
|
|||
#[target_feature = "+sse2"]
|
||||
// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
|
||||
// instr?
|
||||
pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 {
|
||||
mem::transmute(a.extract(0))
|
||||
pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 {
|
||||
mem::transmute(simd_extract::<_, i64>(a, 0))
|
||||
}
|
||||
|
||||
/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
|
||||
|
|
@ -88,8 +89,8 @@ pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 {
|
|||
#[target_feature = "+sse2"]
|
||||
// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
|
||||
// instr?
|
||||
pub unsafe fn _mm_movpi64_epi64(a: __m64) -> i64x2 {
|
||||
i64x2::new(mem::transmute(a), 0)
|
||||
pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i {
|
||||
_mm_set_epi64x(0, mem::transmute(a))
|
||||
}
|
||||
|
||||
/// Converts the two double-precision floating-point elements of a
|
||||
|
|
@ -98,8 +99,8 @@ pub unsafe fn _mm_movpi64_epi64(a: __m64) -> i64x2 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
#[cfg_attr(test, assert_instr(cvtpd2pi))]
|
||||
pub unsafe fn _mm_cvtpd_pi32(a: f64x2) -> i32x2 {
|
||||
mem::transmute(cvtpd2pi(a))
|
||||
pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 {
|
||||
cvtpd2pi(a)
|
||||
}
|
||||
|
||||
/// Converts the two double-precision floating-point elements of a
|
||||
|
|
@ -110,8 +111,8 @@ pub unsafe fn _mm_cvtpd_pi32(a: f64x2) -> i32x2 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
#[cfg_attr(test, assert_instr(cvttpd2pi))]
|
||||
pub unsafe fn _mm_cvttpd_pi32(a: f64x2) -> i32x2 {
|
||||
mem::transmute(cvttpd2pi(a))
|
||||
pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 {
|
||||
cvttpd2pi(a)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
|
|
@ -123,11 +124,11 @@ extern "C" {
|
|||
#[link_name = "llvm.x86.mmx.psub.q"]
|
||||
fn psubq(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.sse.cvtpi2pd"]
|
||||
fn cvtpi2pd(a: __m64) -> f64x2;
|
||||
fn cvtpi2pd(a: __m64) -> __m128d;
|
||||
#[link_name = "llvm.x86.sse.cvtpd2pi"]
|
||||
fn cvtpd2pi(a: f64x2) -> __m64;
|
||||
fn cvtpd2pi(a: __m128d) -> __m64;
|
||||
#[link_name = "llvm.x86.sse.cvttpd2pi"]
|
||||
fn cvttpd2pi(a: f64x2) -> __m64;
|
||||
fn cvttpd2pi(a: __m128d) -> __m64;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -136,74 +137,72 @@ mod tests {
|
|||
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use v128::*;
|
||||
use v64::*;
|
||||
use x86::i686::sse2;
|
||||
use x86::*;
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_add_si64() {
|
||||
unsafe fn test_mm_add_si64() {
|
||||
let a = 1i64;
|
||||
let b = 2i64;
|
||||
let expected = 3i64;
|
||||
let r = sse2::_mm_add_si64(mem::transmute(a), mem::transmute(b));
|
||||
let r = _mm_add_si64(mem::transmute(a), mem::transmute(b));
|
||||
assert_eq!(mem::transmute::<__m64, i64>(r), expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_mul_su32() {
|
||||
let a = u32x2::new(1, 2);
|
||||
let b = u32x2::new(3, 4);
|
||||
unsafe fn test_mm_mul_su32() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let b = _mm_setr_pi32(3, 4);
|
||||
let expected = 3u64;
|
||||
let r = sse2::_mm_mul_su32(a, b);
|
||||
let r = _mm_mul_su32(a, b);
|
||||
assert_eq!(r, mem::transmute(expected));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_sub_si64() {
|
||||
unsafe fn test_mm_sub_si64() {
|
||||
let a = 1i64;
|
||||
let b = 2i64;
|
||||
let expected = -1i64;
|
||||
let r = sse2::_mm_sub_si64(mem::transmute(a), mem::transmute(b));
|
||||
let r = _mm_sub_si64(mem::transmute(a), mem::transmute(b));
|
||||
assert_eq!(mem::transmute::<__m64, i64>(r), expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_cvtpi32_pd() {
|
||||
let a = i32x2::new(1, 2);
|
||||
let expected = f64x2::new(1., 2.);
|
||||
let r = sse2::_mm_cvtpi32_pd(a);
|
||||
assert_eq!(r, expected);
|
||||
unsafe fn test_mm_cvtpi32_pd() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let expected = _mm_setr_pd(1., 2.);
|
||||
let r = _mm_cvtpi32_pd(a);
|
||||
assert_eq_m128d(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_set_epi64() {
|
||||
unsafe fn test_mm_set_epi64() {
|
||||
let r =
|
||||
sse2::_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
|
||||
assert_eq!(r, i64x2::new(2, 1));
|
||||
_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
|
||||
assert_eq!(r, _mm_setr_epi64x(2, 1));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_set1_epi64() {
|
||||
let r = sse2::_mm_set1_epi64(mem::transmute(1i64));
|
||||
assert_eq!(r, i64x2::new(1, 1));
|
||||
unsafe fn test_mm_set1_epi64() {
|
||||
let r = _mm_set1_epi64(mem::transmute(1i64));
|
||||
assert_eq!(r, _mm_setr_epi64x(1, 1));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_setr_epi64() {
|
||||
unsafe fn test_mm_setr_epi64() {
|
||||
let r =
|
||||
sse2::_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
|
||||
assert_eq!(r, i64x2::new(1, 2));
|
||||
_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
|
||||
assert_eq!(r, _mm_setr_epi64x(1, 2));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_movepi64_pi64() {
|
||||
let r = sse2::_mm_movepi64_pi64(i64x2::new(5, 0));
|
||||
assert_eq!(r, mem::transmute(i8x8::new(5, 0, 0, 0, 0, 0, 0, 0)));
|
||||
unsafe fn test_mm_movepi64_pi64() {
|
||||
let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0));
|
||||
assert_eq!(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_movpi64_epi64() {
|
||||
let r = sse2::_mm_movpi64_epi64(mem::transmute(i8x8::new(
|
||||
unsafe fn test_mm_movpi64_epi64() {
|
||||
let r = _mm_movpi64_epi64(_mm_setr_pi8(
|
||||
5,
|
||||
0,
|
||||
0,
|
||||
|
|
@ -212,27 +211,27 @@ mod tests {
|
|||
0,
|
||||
0,
|
||||
0,
|
||||
)));
|
||||
assert_eq!(r, i64x2::new(5, 0));
|
||||
));
|
||||
assert_eq!(r, _mm_setr_epi64x(5, 0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_cvtpd_pi32() {
|
||||
let a = f64x2::new(5., 0.);
|
||||
let r = sse2::_mm_cvtpd_pi32(a);
|
||||
assert_eq!(r, i32x2::new(5, 0));
|
||||
unsafe fn test_mm_cvtpd_pi32() {
|
||||
let a = _mm_setr_pd(5., 0.);
|
||||
let r = _mm_cvtpd_pi32(a);
|
||||
assert_eq!(r, _mm_setr_pi32(5, 0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
unsafe fn _mm_cvttpd_pi32() {
|
||||
unsafe fn test_mm_cvttpd_pi32() {
|
||||
use std::{f64, i32};
|
||||
|
||||
let a = f64x2::new(5., 0.);
|
||||
let r = sse2::_mm_cvttpd_pi32(a);
|
||||
assert_eq!(r, i32x2::new(5, 0));
|
||||
let a = _mm_setr_pd(5., 0.);
|
||||
let r = _mm_cvttpd_pi32(a);
|
||||
assert_eq!(r, _mm_setr_pi32(5, 0));
|
||||
|
||||
let a = f64x2::new(f64::NEG_INFINITY, f64::NAN);
|
||||
let r = sse2::_mm_cvttpd_pi32(a);
|
||||
assert_eq!(r, i32x2::new(i32::MIN, i32::MIN));
|
||||
let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
|
||||
let r = _mm_cvttpd_pi32(a);
|
||||
assert_eq!(r, _mm_setr_pi32(i32::MIN, i32::MIN));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,11 +9,11 @@ use stdsimd_test::assert_instr;
|
|||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.sse41.ptestz"]
|
||||
fn ptestz(a: i64x2, mask: i64x2) -> i32;
|
||||
fn ptestz(a: __m128i, mask: __m128i) -> i32;
|
||||
#[link_name = "llvm.x86.sse41.ptestc"]
|
||||
fn ptestc(a: i64x2, mask: i64x2) -> i32;
|
||||
fn ptestc(a: __m128i, mask: __m128i) -> i32;
|
||||
#[link_name = "llvm.x86.sse41.ptestnzc"]
|
||||
fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
|
||||
fn ptestnzc(a: __m128i, mask: __m128i) -> i32;
|
||||
}
|
||||
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are all
|
||||
|
|
@ -33,7 +33,7 @@ extern "C" {
|
|||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
|
||||
ptestz(i64x2::from(a), i64x2::from(mask))
|
||||
ptestz(a, mask)
|
||||
}
|
||||
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are all
|
||||
|
|
@ -53,7 +53,7 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
|
|||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
|
||||
ptestc(i64x2::from(a), i64x2::from(mask))
|
||||
ptestc(a, mask)
|
||||
}
|
||||
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are
|
||||
|
|
@ -73,7 +73,7 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
|
|||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
|
||||
ptestnzc(i64x2::from(a), i64x2::from(mask))
|
||||
ptestnzc(a, mask)
|
||||
}
|
||||
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are all
|
||||
|
|
@ -138,104 +138,103 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdsimd_test::simd_test;
|
||||
use x86::i686::sse41;
|
||||
use v128::*;
|
||||
use x86::*;
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_testz_si128() {
|
||||
let a = i8x16::splat(1);
|
||||
let mask = i8x16::splat(0);
|
||||
let r = sse41::_mm_testz_si128(a.into(), mask.into());
|
||||
unsafe fn test_mm_testz_si128() {
|
||||
let a = _mm_set1_epi8(1);
|
||||
let mask = _mm_set1_epi8(0);
|
||||
let r = _mm_testz_si128(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
let a = i8x16::splat(0b101);
|
||||
let mask = i8x16::splat(0b110);
|
||||
let r = sse41::_mm_testz_si128(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(0b101);
|
||||
let mask = _mm_set1_epi8(0b110);
|
||||
let r = _mm_testz_si128(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
let a = i8x16::splat(0b011);
|
||||
let mask = i8x16::splat(0b100);
|
||||
let r = sse41::_mm_testz_si128(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(0b011);
|
||||
let mask = _mm_set1_epi8(0b100);
|
||||
let r = _mm_testz_si128(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_testc_si128() {
|
||||
let a = i8x16::splat(-1);
|
||||
let mask = i8x16::splat(0);
|
||||
let r = sse41::_mm_testc_si128(a.into(), mask.into());
|
||||
unsafe fn test_mm_testc_si128() {
|
||||
let a = _mm_set1_epi8(-1);
|
||||
let mask = _mm_set1_epi8(0);
|
||||
let r = _mm_testc_si128(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
let a = i8x16::splat(0b101);
|
||||
let mask = i8x16::splat(0b110);
|
||||
let r = sse41::_mm_testc_si128(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(0b101);
|
||||
let mask = _mm_set1_epi8(0b110);
|
||||
let r = _mm_testc_si128(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
let a = i8x16::splat(0b101);
|
||||
let mask = i8x16::splat(0b100);
|
||||
let r = sse41::_mm_testc_si128(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(0b101);
|
||||
let mask = _mm_set1_epi8(0b100);
|
||||
let r = _mm_testc_si128(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_testnzc_si128() {
|
||||
let a = i8x16::splat(0);
|
||||
let mask = i8x16::splat(1);
|
||||
let r = sse41::_mm_testnzc_si128(a.into(), mask.into());
|
||||
unsafe fn test_mm_testnzc_si128() {
|
||||
let a = _mm_set1_epi8(0);
|
||||
let mask = _mm_set1_epi8(1);
|
||||
let r = _mm_testnzc_si128(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
let a = i8x16::splat(-1);
|
||||
let mask = i8x16::splat(0);
|
||||
let r = sse41::_mm_testnzc_si128(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(-1);
|
||||
let mask = _mm_set1_epi8(0);
|
||||
let r = _mm_testnzc_si128(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
let a = i8x16::splat(0b101);
|
||||
let mask = i8x16::splat(0b110);
|
||||
let r = sse41::_mm_testnzc_si128(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(0b101);
|
||||
let mask = _mm_set1_epi8(0b110);
|
||||
let r = _mm_testnzc_si128(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
let a = i8x16::splat(0b101);
|
||||
let mask = i8x16::splat(0b101);
|
||||
let r = sse41::_mm_testnzc_si128(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(0b101);
|
||||
let mask = _mm_set1_epi8(0b101);
|
||||
let r = _mm_testnzc_si128(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_test_all_zeros() {
|
||||
let a = i8x16::splat(1);
|
||||
let mask = i8x16::splat(0);
|
||||
let r = sse41::_mm_test_all_zeros(a.into(), mask.into());
|
||||
unsafe fn test_mm_test_all_zeros() {
|
||||
let a = _mm_set1_epi8(1);
|
||||
let mask = _mm_set1_epi8(0);
|
||||
let r = _mm_test_all_zeros(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
let a = i8x16::splat(0b101);
|
||||
let mask = i8x16::splat(0b110);
|
||||
let r = sse41::_mm_test_all_zeros(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(0b101);
|
||||
let mask = _mm_set1_epi8(0b110);
|
||||
let r = _mm_test_all_zeros(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
let a = i8x16::splat(0b011);
|
||||
let mask = i8x16::splat(0b100);
|
||||
let r = sse41::_mm_test_all_zeros(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(0b011);
|
||||
let mask = _mm_set1_epi8(0b100);
|
||||
let r = _mm_test_all_zeros(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_test_all_ones() {
|
||||
let a = i8x16::splat(-1);
|
||||
let r = sse41::_mm_test_all_ones(a.into());
|
||||
unsafe fn test_mm_test_all_ones() {
|
||||
let a = _mm_set1_epi8(-1);
|
||||
let r = _mm_test_all_ones(a);
|
||||
assert_eq!(r, 1);
|
||||
let a = i8x16::splat(0b101);
|
||||
let r = sse41::_mm_test_all_ones(a.into());
|
||||
let a = _mm_set1_epi8(0b101);
|
||||
let r = _mm_test_all_ones(a);
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test = "sse4.1"]
|
||||
unsafe fn _mm_test_mix_ones_zeros() {
|
||||
let a = i8x16::splat(0);
|
||||
let mask = i8x16::splat(1);
|
||||
let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into());
|
||||
unsafe fn test_mm_test_mix_ones_zeros() {
|
||||
let a = _mm_set1_epi8(0);
|
||||
let mask = _mm_set1_epi8(1);
|
||||
let r = _mm_test_mix_ones_zeros(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
let a = i8x16::splat(-1);
|
||||
let mask = i8x16::splat(0);
|
||||
let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(-1);
|
||||
let mask = _mm_set1_epi8(0);
|
||||
let r = _mm_test_mix_ones_zeros(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
let a = i8x16::splat(0b101);
|
||||
let mask = i8x16::splat(0b110);
|
||||
let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(0b101);
|
||||
let mask = _mm_set1_epi8(0b110);
|
||||
let r = _mm_test_mix_ones_zeros(a, mask);
|
||||
assert_eq!(r, 1);
|
||||
let a = i8x16::splat(0b101);
|
||||
let mask = i8x16::splat(0b101);
|
||||
let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into());
|
||||
let a = _mm_set1_epi8(0b101);
|
||||
let mask = _mm_set1_epi8(0b101);
|
||||
let r = _mm_test_mix_ones_zeros(a, mask);
|
||||
assert_eq!(r, 0);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
//! `i686`'s Streaming SIMD Extensions 4.2 (SSE4.2)
|
||||
|
||||
use simd_llvm::*;
|
||||
use v128::*;
|
||||
use x86::*;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
|
@ -10,22 +12,21 @@ use stdsimd_test::assert_instr;
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse4.2"]
|
||||
#[cfg_attr(test, assert_instr(pcmpgtq))]
|
||||
pub unsafe fn _mm_cmpgt_epi64(a: i64x2, b: i64x2) -> i64x2 {
|
||||
a.gt(b)
|
||||
pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use v128::*;
|
||||
use x86::i686::sse42;
|
||||
use x86::*;
|
||||
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
#[simd_test = "sse4.2"]
|
||||
unsafe fn _mm_cmpgt_epi64() {
|
||||
let a = i64x2::splat(0x00).replace(1, 0x2a);
|
||||
let b = i64x2::splat(0x00);
|
||||
let i = sse42::_mm_cmpgt_epi64(a, b);
|
||||
assert_eq!(i, i64x2::new(0x00, 0xffffffffffffffffu64 as i64));
|
||||
unsafe fn test_mm_cmpgt_epi64() {
|
||||
let a = _mm_setr_epi64x(0, 0x2a);
|
||||
let b = _mm_set1_epi64x(0x00);
|
||||
let i = _mm_cmpgt_epi64(a, b);
|
||||
assert_eq!(i, _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue