Migrate the i686 module to vendor types (#279)

* Migrate `i686::sse` to vendor types

* Migrate `i686::sse2` to vendor types

* Migrate i686::sse41 to vendor types

* Migrate i686::sse42 to vendor types
This commit is contained in:
Alex Crichton 2018-01-12 14:08:20 -06:00 committed by GitHub
parent 48a7490711
commit e77ebf194a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 298 additions and 244 deletions

View file

@ -185,6 +185,69 @@ pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
punpckldq(a, b)
}
/// Set packed 16-bit integers in dst with the supplied values.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 {
_mm_setr_pi16(e0, e1, e2, e3)
}
/// Set packed 32-bit integers in dst with the supplied values.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 {
_mm_setr_pi32(e0, e1)
}
/// Set packed 8-bit integers in dst with the supplied values.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 {
_mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7)
}
/// Broadcast 16-bit integer a to all all elements of dst.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 {
_mm_setr_pi16(a, a, a, a)
}
/// Broadcast 32-bit integer a to all all elements of dst.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 {
_mm_setr_pi32(a, a)
}
/// Broadcast 8-bit integer a to all all elements of dst.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 {
_mm_setr_pi8(a, a, a, a, a, a, a, a)
}
/// Set packed 16-bit integers in dst with the supplied values in reverse order.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 {
mem::transmute(i16x4::new(e0, e1, e2, e3))
}
/// Set packed 32-bit integers in dst with the supplied values in reverse order.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
mem::transmute(i32x2::new(e0, e1))
}
/// Set packed 8-bit integers in dst with the supplied values in reverse order.
#[inline(always)]
#[target_feature = "+mmx"]
pub unsafe fn _mm_setr_pi8(e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8) -> __m64 {
mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.mmx.padd.b"]

View file

@ -1,6 +1,5 @@
//! `i686` Streaming SIMD Extensions (SSE)
use v64::*;
use core::mem;
use x86::*;
@ -204,7 +203,7 @@ pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: i32x2) -> __m128 {
pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
cvtpi2ps(a, mem::transmute(b))
}
@ -215,7 +214,7 @@ pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: i32x2) -> __m128 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: i32x2) -> __m128 {
pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 {
_mm_cvtpi32_ps(a, b)
}
@ -274,7 +273,7 @@ pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32x2_ps(a: i32x2, b: i32x2) -> __m128 {
pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
let c = i586::_mm_setzero_ps();
let c = _mm_cvtpi32_ps(c, b);
let c = i586::_mm_movelh_ps(c, c);
@ -314,7 +313,7 @@ pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
pub unsafe fn _mm_extract_pi16(a: i16x4, imm2: i32) -> i16 {
pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i16 {
macro_rules! call {
($imm2:expr) => { pextrw(mem::transmute(a), $imm2) as i16 }
}
@ -326,7 +325,7 @@ pub unsafe fn _mm_extract_pi16(a: i16x4, imm2: i32) -> i16 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
pub unsafe fn _m_pextrw(a: i16x4, imm2: i32) -> i16 {
pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i16 {
_mm_extract_pi16(a, imm2)
}
@ -359,7 +358,7 @@ pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _mm_movemask_pi8(a: i16x4) -> i32 {
pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
pmovmskb(mem::transmute(a))
}
@ -369,7 +368,7 @@ pub unsafe fn _mm_movemask_pi8(a: i16x4) -> i32 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _m_pmovmskb(a: i16x4) -> i32 {
pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
_mm_movemask_pi8(a)
}
@ -399,7 +398,7 @@ pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvttps_pi32(a: __m128) -> i32x2 {
pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
mem::transmute(cvttps2pi(a))
}
@ -408,7 +407,7 @@ pub unsafe fn _mm_cvttps_pi32(a: __m128) -> i32x2 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> i32x2 {
pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
_mm_cvttps_pi32(a)
}
@ -458,107 +457,99 @@ pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
mod tests {
use std::mem;
use v64::{i16x4, i32x2, i8x8, u16x4, u8x8};
use x86::*;
use stdsimd_test::simd_test;
#[target_feature = "+avx"]
unsafe fn assert_eq_m128(a: __m128, b: __m128) {
let r = _mm_cmpeq_ps(a, b);
if _mm_movemask_ps(r) != 0b1111 {
panic!("{:?} != {:?}", a, b);
}
}
#[simd_test = "sse"]
unsafe fn test_mm_max_pi16() {
let a = i16x4::new(-1, 6, -3, 8);
let b = i16x4::new(5, -2, 7, -4);
let r = i16x4::new(5, 6, 7, 8);
let a = _mm_setr_pi16(-1, 6, -3, 8);
let b = _mm_setr_pi16(5, -2, 7, -4);
let r = _mm_setr_pi16(5, 6, 7, 8);
assert_eq!(r, i16x4::from(_mm_max_pi16(a.into(), b.into())));
assert_eq!(r, i16x4::from(_m_pmaxsw(a.into(), b.into())));
assert_eq!(r, _mm_max_pi16(a, b));
assert_eq!(r, _m_pmaxsw(a, b));
}
#[simd_test = "sse"]
unsafe fn test_mm_max_pu8() {
let a = u8x8::new(2, 6, 3, 8, 2, 6, 3, 8);
let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4);
let r = u8x8::new(5, 6, 7, 8, 5, 6, 7, 8);
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8);
assert_eq!(r, u8x8::from(_mm_max_pu8(a.into(), b.into())));
assert_eq!(r, u8x8::from(_m_pmaxub(a.into(), b.into())));
assert_eq!(r, _mm_max_pu8(a, b));
assert_eq!(r, _m_pmaxub(a, b));
}
#[simd_test = "sse"]
unsafe fn test_mm_min_pi16() {
let a = i16x4::new(-1, 6, -3, 8);
let b = i16x4::new(5, -2, 7, -4);
let r = i16x4::new(-1, -2, -3, -4);
let a = _mm_setr_pi16(-1, 6, -3, 8);
let b = _mm_setr_pi16(5, -2, 7, -4);
let r = _mm_setr_pi16(-1, -2, -3, -4);
assert_eq!(r, i16x4::from(_mm_min_pi16(a.into(), b.into())));
assert_eq!(r, i16x4::from(_m_pminsw(a.into(), b.into())));
assert_eq!(r, _mm_min_pi16(a, b));
assert_eq!(r, _m_pminsw(a, b));
}
#[simd_test = "sse"]
unsafe fn test_mm_min_pu8() {
let a = u8x8::new(2, 6, 3, 8, 2, 6, 3, 8);
let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4);
let r = u8x8::new(2, 2, 3, 4, 2, 2, 3, 4);
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4);
assert_eq!(r, u8x8::from(_mm_min_pu8(a.into(), b.into())));
assert_eq!(r, u8x8::from(_m_pminub(a.into(), b.into())));
assert_eq!(r, _mm_min_pu8(a, b));
assert_eq!(r, _m_pminub(a, b));
}
#[simd_test = "sse"]
unsafe fn test_mm_mulhi_pu16() {
let (a, b) = (u16x4::splat(1000), u16x4::splat(1001));
let r = u16x4::from(_mm_mulhi_pu16(a.into(), b.into()));
assert_eq!(r, u16x4::splat(15));
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _mm_mulhi_pu16(a, b);
assert_eq!(r, _mm_set1_pi16(15));
}
#[simd_test = "sse"]
unsafe fn test_m_pmulhuw() {
let (a, b) = (u16x4::splat(1000), u16x4::splat(1001));
let r = _m_pmulhuw(a.into(), b.into());
assert_eq!(r, u16x4::splat(15).into());
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _m_pmulhuw(a, b);
assert_eq!(r, _mm_set1_pi16(15));
}
#[simd_test = "sse"]
unsafe fn test_mm_avg_pu8() {
let (a, b) = (u8x8::splat(3), u8x8::splat(9));
let r = u8x8::from(_mm_avg_pu8(a.into(), b.into()));
assert_eq!(r, u8x8::splat(6));
let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9));
let r = _mm_avg_pu8(a, b);
assert_eq!(r, _mm_set1_pi8(6));
let r = u8x8::from(_m_pavgb(a.into(), b.into()));
assert_eq!(r, u8x8::splat(6));
let r = _m_pavgb(a, b);
assert_eq!(r, _mm_set1_pi8(6));
}
#[simd_test = "sse"]
unsafe fn test_mm_avg_pu16() {
let (a, b) = (u16x4::splat(3), u16x4::splat(9));
let r = u16x4::from(_mm_avg_pu16(a.into(), b.into()));
assert_eq!(r, u16x4::splat(6));
let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9));
let r = _mm_avg_pu16(a, b);
assert_eq!(r, _mm_set1_pi16(6));
let r = u16x4::from(_m_pavgw(a.into(), b.into()));
assert_eq!(r, u16x4::splat(6));
let r = _m_pavgw(a, b);
assert_eq!(r, _mm_set1_pi16(6));
}
#[simd_test = "sse"]
unsafe fn test_mm_sad_pu8() {
let a = u8x8::new(255, 254, 253, 252, 1, 2, 3, 4);
let b = u8x8::new(0, 0, 0, 0, 2, 1, 2, 1);
let r = _mm_sad_pu8(a.into(), b.into());
assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0)));
let a = _mm_setr_pi8(255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
1, 2, 3, 4);
let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1);
let r = _mm_sad_pu8(a, b);
assert_eq!(r, mem::transmute(_mm_setr_pi16(1020, 0, 0, 0)));
let r = _m_psadbw(a.into(), b.into());
assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0)));
let r = _m_psadbw(a, b);
assert_eq!(r, mem::transmute(_mm_setr_pi16(1020, 0, 0, 0)));
}
#[simd_test = "sse"]
unsafe fn test_mm_cvtpi32_ps() {
let a = _mm_setr_ps(0., 0., 3., 4.);
let b = i32x2::new(1, 2);
let b = _mm_setr_pi32(1, 2);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi32_ps(a, b);
assert_eq_m128(r, expected);
@ -569,40 +560,40 @@ mod tests {
#[simd_test = "sse"]
unsafe fn test_mm_cvtpi16_ps() {
let a = i16x4::new(1, 2, 3, 4);
let a = _mm_setr_pi16(1, 2, 3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi16_ps(a.into());
let r = _mm_cvtpi16_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse"]
unsafe fn test_mm_cvtpu16_ps() {
let a = u16x4::new(1, 2, 3, 4);
let a = _mm_setr_pi16(1, 2, 3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpu16_ps(a.into());
let r = _mm_cvtpu16_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse"]
unsafe fn test_mm_cvtpi8_ps() {
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi8_ps(a.into());
let r = _mm_cvtpi8_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse"]
unsafe fn test_mm_cvtpu8_ps() {
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpu8_ps(a.into());
let r = _mm_cvtpu8_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse"]
unsafe fn test_mm_cvtpi32x2_ps() {
let a = i32x2::new(1, 2);
let b = i32x2::new(3, 4);
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi32x2_ps(a, b);
assert_eq_m128(r, expected);
@ -610,24 +601,25 @@ mod tests {
#[simd_test = "sse"]
unsafe fn test_mm_maskmove_si64() {
let a = i8x8::splat(9);
let mask = i8x8::splat(0).replace(2, 0x80u8 as i8);
let mut r = i8x8::splat(0);
let a = _mm_set1_pi8(9);
let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0);
let mut r = _mm_set1_pi8(0);
_mm_maskmove_si64(
a.into(),
mask.into(),
a,
mask,
&mut r as *mut _ as *mut i8,
);
assert_eq!(r, i8x8::splat(0).replace(2, 9));
let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0);
assert_eq!(r, e);
let mut r = i8x8::splat(0);
_m_maskmovq(a.into(), mask.into(), &mut r as *mut _ as *mut i8);
assert_eq!(r, i8x8::splat(0).replace(2, 9));
let mut r = _mm_set1_pi8(0);
_m_maskmovq(a, mask, &mut r as *mut _ as *mut i8);
assert_eq!(r, e);
}
#[simd_test = "sse"]
unsafe fn test_mm_extract_pi16() {
let a = i16x4::new(1, 2, 3, 4);
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_extract_pi16(a, 0);
assert_eq!(r, 1);
let r = _mm_extract_pi16(a, 1);
@ -639,21 +631,21 @@ mod tests {
#[simd_test = "sse"]
unsafe fn test_mm_insert_pi16() {
let a = i16x4::new(1, 2, 3, 4);
let r = i16x4::from(_mm_insert_pi16(a.into(), 0, 0b0));
let expected = i16x4::new(0, 2, 3, 4);
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_insert_pi16(a, 0, 0b0);
let expected = _mm_setr_pi16(0, 2, 3, 4);
assert_eq!(r, expected);
let r = i16x4::from(_mm_insert_pi16(a.into(), 0, 0b10));
let expected = i16x4::new(1, 2, 0, 4);
let r = _mm_insert_pi16(a, 0, 0b10);
let expected = _mm_setr_pi16(1, 2, 0, 4);
assert_eq!(r, expected);
let r = i16x4::from(_m_pinsrw(a.into(), 0, 0b10));
let r = _m_pinsrw(a, 0, 0b10);
assert_eq!(r, expected);
}
#[simd_test = "sse"]
unsafe fn test_mm_movemask_pi8() {
let a = i16x4::new(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
let r = _mm_movemask_pi8(a);
assert_eq!(r, 0b10001);
@ -663,28 +655,28 @@ mod tests {
#[simd_test = "sse"]
unsafe fn test_mm_shuffle_pi16() {
let a = i16x4::new(1, 2, 3, 4);
let r = i16x4::from(_mm_shuffle_pi16(a.into(), 0b00_01_01_11));
let expected = i16x4::new(4, 2, 2, 1);
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_shuffle_pi16(a, 0b00_01_01_11);
let expected = _mm_setr_pi16(4, 2, 2, 1);
assert_eq!(r, expected);
let r = i16x4::from(_m_pshufw(a.into(), 0b00_01_01_11));
let r = _m_pshufw(a, 0b00_01_01_11);
assert_eq!(r, expected);
}
#[simd_test = "sse"]
unsafe fn test_mm_cvtps_pi32() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let r = i32x2::new(1, 2);
let r = _mm_setr_pi32(1, 2);
assert_eq!(r, i32x2::from(_mm_cvtps_pi32(a)));
assert_eq!(r, i32x2::from(_mm_cvt_ps2pi(a)));
assert_eq!(r, _mm_cvtps_pi32(a));
assert_eq!(r, _mm_cvt_ps2pi(a));
}
#[simd_test = "sse"]
unsafe fn test_mm_cvttps_pi32() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = i32x2::new(7, 2);
let r = _mm_setr_pi32(7, 2);
assert_eq!(r, _mm_cvttps_pi32(a));
assert_eq!(r, _mm_cvtt_ps2pi(a));
@ -693,14 +685,14 @@ mod tests {
#[simd_test = "sse"]
unsafe fn test_mm_cvtps_pi16() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = i16x4::new(7, 2, 3, 4);
assert_eq!(r, i16x4::from(_mm_cvtps_pi16(a)));
let r = _mm_setr_pi16(7, 2, 3, 4);
assert_eq!(r, _mm_cvtps_pi16(a));
}
#[simd_test = "sse"]
unsafe fn test_mm_cvtps_pi8() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0);
assert_eq!(r, i8x8::from(_mm_cvtps_pi8(a)));
let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0);
assert_eq!(r, _mm_cvtps_pi8(a));
}
}

View file

@ -1,8 +1,9 @@
//! `i686`'s Streaming SIMD Extensions 2 (SSE2)
use core::mem;
use v128::*;
use v64::*;
use simd_llvm::simd_extract;
use x86::*;
#[cfg(test)]
use stdsimd_test::assert_instr;
@ -22,7 +23,7 @@ pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+sse2"]
#[cfg_attr(test, assert_instr(pmuludq))]
pub unsafe fn _mm_mul_su32(a: u32x2, b: u32x2) -> __m64 {
pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
pmuludq(mem::transmute(a), mem::transmute(b))
}
@ -41,8 +42,8 @@ pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+sse2"]
#[cfg_attr(test, assert_instr(cvtpi2pd))]
pub unsafe fn _mm_cvtpi32_pd(a: i32x2) -> f64x2 {
cvtpi2pd(mem::transmute(a))
pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d {
cvtpi2pd(a)
}
/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
@ -50,8 +51,8 @@ pub unsafe fn _mm_cvtpi32_pd(a: i32x2) -> f64x2 {
#[inline(always)]
#[target_feature = "+sse2"]
// no particular instruction to test
pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> i64x2 {
i64x2::new(mem::transmute(e0), mem::transmute(e1))
pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i {
_mm_set_epi64x(mem::transmute(e1), mem::transmute(e0))
}
/// Initializes both values in a 128-bit vector of [2 x i64] with the
@ -59,8 +60,8 @@ pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> i64x2 {
#[inline(always)]
#[target_feature = "+sse2"]
// no particular instruction to test
pub unsafe fn _mm_set1_epi64(a: __m64) -> i64x2 {
i64x2::new(mem::transmute(a), mem::transmute(a))
pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i {
_mm_set_epi64x(mem::transmute(a), mem::transmute(a))
}
/// Constructs a 128-bit integer vector, initialized in reverse order
@ -68,8 +69,8 @@ pub unsafe fn _mm_set1_epi64(a: __m64) -> i64x2 {
#[inline(always)]
#[target_feature = "+sse2"]
// no particular instruction to test
pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> i64x2 {
i64x2::new(mem::transmute(e1), mem::transmute(e0))
pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i {
_mm_set_epi64x(mem::transmute(e0), mem::transmute(e1))
}
/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
@ -78,8 +79,8 @@ pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> i64x2 {
#[target_feature = "+sse2"]
// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
// instr?
pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 {
mem::transmute(a.extract(0))
pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 {
mem::transmute(simd_extract::<_, i64>(a, 0))
}
/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
@ -88,8 +89,8 @@ pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 {
#[target_feature = "+sse2"]
// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
// instr?
pub unsafe fn _mm_movpi64_epi64(a: __m64) -> i64x2 {
i64x2::new(mem::transmute(a), 0)
pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i {
_mm_set_epi64x(0, mem::transmute(a))
}
/// Converts the two double-precision floating-point elements of a
@ -98,8 +99,8 @@ pub unsafe fn _mm_movpi64_epi64(a: __m64) -> i64x2 {
#[inline(always)]
#[target_feature = "+sse2"]
#[cfg_attr(test, assert_instr(cvtpd2pi))]
pub unsafe fn _mm_cvtpd_pi32(a: f64x2) -> i32x2 {
mem::transmute(cvtpd2pi(a))
pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 {
cvtpd2pi(a)
}
/// Converts the two double-precision floating-point elements of a
@ -110,8 +111,8 @@ pub unsafe fn _mm_cvtpd_pi32(a: f64x2) -> i32x2 {
#[inline(always)]
#[target_feature = "+sse2"]
#[cfg_attr(test, assert_instr(cvttpd2pi))]
pub unsafe fn _mm_cvttpd_pi32(a: f64x2) -> i32x2 {
mem::transmute(cvttpd2pi(a))
pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 {
cvttpd2pi(a)
}
#[allow(improper_ctypes)]
@ -123,11 +124,11 @@ extern "C" {
#[link_name = "llvm.x86.mmx.psub.q"]
fn psubq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.sse.cvtpi2pd"]
fn cvtpi2pd(a: __m64) -> f64x2;
fn cvtpi2pd(a: __m64) -> __m128d;
#[link_name = "llvm.x86.sse.cvtpd2pi"]
fn cvtpd2pi(a: f64x2) -> __m64;
fn cvtpd2pi(a: __m128d) -> __m64;
#[link_name = "llvm.x86.sse.cvttpd2pi"]
fn cvttpd2pi(a: f64x2) -> __m64;
fn cvttpd2pi(a: __m128d) -> __m64;
}
#[cfg(test)]
@ -136,74 +137,72 @@ mod tests {
use stdsimd_test::simd_test;
use v128::*;
use v64::*;
use x86::i686::sse2;
use x86::*;
#[simd_test = "sse2"]
unsafe fn _mm_add_si64() {
unsafe fn test_mm_add_si64() {
let a = 1i64;
let b = 2i64;
let expected = 3i64;
let r = sse2::_mm_add_si64(mem::transmute(a), mem::transmute(b));
let r = _mm_add_si64(mem::transmute(a), mem::transmute(b));
assert_eq!(mem::transmute::<__m64, i64>(r), expected);
}
#[simd_test = "sse2"]
unsafe fn _mm_mul_su32() {
let a = u32x2::new(1, 2);
let b = u32x2::new(3, 4);
unsafe fn test_mm_mul_su32() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(3, 4);
let expected = 3u64;
let r = sse2::_mm_mul_su32(a, b);
let r = _mm_mul_su32(a, b);
assert_eq!(r, mem::transmute(expected));
}
#[simd_test = "sse2"]
unsafe fn _mm_sub_si64() {
unsafe fn test_mm_sub_si64() {
let a = 1i64;
let b = 2i64;
let expected = -1i64;
let r = sse2::_mm_sub_si64(mem::transmute(a), mem::transmute(b));
let r = _mm_sub_si64(mem::transmute(a), mem::transmute(b));
assert_eq!(mem::transmute::<__m64, i64>(r), expected);
}
#[simd_test = "sse2"]
unsafe fn _mm_cvtpi32_pd() {
let a = i32x2::new(1, 2);
let expected = f64x2::new(1., 2.);
let r = sse2::_mm_cvtpi32_pd(a);
assert_eq!(r, expected);
unsafe fn test_mm_cvtpi32_pd() {
let a = _mm_setr_pi32(1, 2);
let expected = _mm_setr_pd(1., 2.);
let r = _mm_cvtpi32_pd(a);
assert_eq_m128d(r, expected);
}
#[simd_test = "sse2"]
unsafe fn _mm_set_epi64() {
unsafe fn test_mm_set_epi64() {
let r =
sse2::_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
assert_eq!(r, i64x2::new(2, 1));
_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
assert_eq!(r, _mm_setr_epi64x(2, 1));
}
#[simd_test = "sse2"]
unsafe fn _mm_set1_epi64() {
let r = sse2::_mm_set1_epi64(mem::transmute(1i64));
assert_eq!(r, i64x2::new(1, 1));
unsafe fn test_mm_set1_epi64() {
let r = _mm_set1_epi64(mem::transmute(1i64));
assert_eq!(r, _mm_setr_epi64x(1, 1));
}
#[simd_test = "sse2"]
unsafe fn _mm_setr_epi64() {
unsafe fn test_mm_setr_epi64() {
let r =
sse2::_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
assert_eq!(r, i64x2::new(1, 2));
_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
assert_eq!(r, _mm_setr_epi64x(1, 2));
}
#[simd_test = "sse2"]
unsafe fn _mm_movepi64_pi64() {
let r = sse2::_mm_movepi64_pi64(i64x2::new(5, 0));
assert_eq!(r, mem::transmute(i8x8::new(5, 0, 0, 0, 0, 0, 0, 0)));
unsafe fn test_mm_movepi64_pi64() {
let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0));
assert_eq!(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
}
#[simd_test = "sse2"]
unsafe fn _mm_movpi64_epi64() {
let r = sse2::_mm_movpi64_epi64(mem::transmute(i8x8::new(
unsafe fn test_mm_movpi64_epi64() {
let r = _mm_movpi64_epi64(_mm_setr_pi8(
5,
0,
0,
@ -212,27 +211,27 @@ mod tests {
0,
0,
0,
)));
assert_eq!(r, i64x2::new(5, 0));
));
assert_eq!(r, _mm_setr_epi64x(5, 0));
}
#[simd_test = "sse2"]
unsafe fn _mm_cvtpd_pi32() {
let a = f64x2::new(5., 0.);
let r = sse2::_mm_cvtpd_pi32(a);
assert_eq!(r, i32x2::new(5, 0));
unsafe fn test_mm_cvtpd_pi32() {
let a = _mm_setr_pd(5., 0.);
let r = _mm_cvtpd_pi32(a);
assert_eq!(r, _mm_setr_pi32(5, 0));
}
#[simd_test = "sse2"]
unsafe fn _mm_cvttpd_pi32() {
unsafe fn test_mm_cvttpd_pi32() {
use std::{f64, i32};
let a = f64x2::new(5., 0.);
let r = sse2::_mm_cvttpd_pi32(a);
assert_eq!(r, i32x2::new(5, 0));
let a = _mm_setr_pd(5., 0.);
let r = _mm_cvttpd_pi32(a);
assert_eq!(r, _mm_setr_pi32(5, 0));
let a = f64x2::new(f64::NEG_INFINITY, f64::NAN);
let r = sse2::_mm_cvttpd_pi32(a);
assert_eq!(r, i32x2::new(i32::MIN, i32::MIN));
let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
let r = _mm_cvttpd_pi32(a);
assert_eq!(r, _mm_setr_pi32(i32::MIN, i32::MIN));
}
}

View file

@ -9,11 +9,11 @@ use stdsimd_test::assert_instr;
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.sse41.ptestz"]
fn ptestz(a: i64x2, mask: i64x2) -> i32;
fn ptestz(a: __m128i, mask: __m128i) -> i32;
#[link_name = "llvm.x86.sse41.ptestc"]
fn ptestc(a: i64x2, mask: i64x2) -> i32;
fn ptestc(a: __m128i, mask: __m128i) -> i32;
#[link_name = "llvm.x86.sse41.ptestnzc"]
fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
fn ptestnzc(a: __m128i, mask: __m128i) -> i32;
}
/// Tests whether the specified bits in a 128-bit integer vector are all
@ -33,7 +33,7 @@ extern "C" {
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
ptestz(i64x2::from(a), i64x2::from(mask))
ptestz(a, mask)
}
/// Tests whether the specified bits in a 128-bit integer vector are all
@ -53,7 +53,7 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestc(i64x2::from(a), i64x2::from(mask))
ptestc(a, mask)
}
/// Tests whether the specified bits in a 128-bit integer vector are
@ -73,7 +73,7 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestnzc(i64x2::from(a), i64x2::from(mask))
ptestnzc(a, mask)
}
/// Tests whether the specified bits in a 128-bit integer vector are all
@ -138,104 +138,103 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;
use x86::i686::sse41;
use v128::*;
use x86::*;
#[simd_test = "sse4.1"]
unsafe fn _mm_testz_si128() {
let a = i8x16::splat(1);
let mask = i8x16::splat(0);
let r = sse41::_mm_testz_si128(a.into(), mask.into());
unsafe fn test_mm_testz_si128() {
let a = _mm_set1_epi8(1);
let mask = _mm_set1_epi8(0);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 1);
let a = i8x16::splat(0b101);
let mask = i8x16::splat(0b110);
let r = sse41::_mm_testz_si128(a.into(), mask.into());
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 0);
let a = i8x16::splat(0b011);
let mask = i8x16::splat(0b100);
let r = sse41::_mm_testz_si128(a.into(), mask.into());
let a = _mm_set1_epi8(0b011);
let mask = _mm_set1_epi8(0b100);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 1);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_testc_si128() {
let a = i8x16::splat(-1);
let mask = i8x16::splat(0);
let r = sse41::_mm_testc_si128(a.into(), mask.into());
unsafe fn test_mm_testc_si128() {
let a = _mm_set1_epi8(-1);
let mask = _mm_set1_epi8(0);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 1);
let a = i8x16::splat(0b101);
let mask = i8x16::splat(0b110);
let r = sse41::_mm_testc_si128(a.into(), mask.into());
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 0);
let a = i8x16::splat(0b101);
let mask = i8x16::splat(0b100);
let r = sse41::_mm_testc_si128(a.into(), mask.into());
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b100);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 1);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_testnzc_si128() {
let a = i8x16::splat(0);
let mask = i8x16::splat(1);
let r = sse41::_mm_testnzc_si128(a.into(), mask.into());
unsafe fn test_mm_testnzc_si128() {
let a = _mm_set1_epi8(0);
let mask = _mm_set1_epi8(1);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
let a = i8x16::splat(-1);
let mask = i8x16::splat(0);
let r = sse41::_mm_testnzc_si128(a.into(), mask.into());
let a = _mm_set1_epi8(-1);
let mask = _mm_set1_epi8(0);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
let a = i8x16::splat(0b101);
let mask = i8x16::splat(0b110);
let r = sse41::_mm_testnzc_si128(a.into(), mask.into());
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 1);
let a = i8x16::splat(0b101);
let mask = i8x16::splat(0b101);
let r = sse41::_mm_testnzc_si128(a.into(), mask.into());
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b101);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_test_all_zeros() {
let a = i8x16::splat(1);
let mask = i8x16::splat(0);
let r = sse41::_mm_test_all_zeros(a.into(), mask.into());
unsafe fn test_mm_test_all_zeros() {
let a = _mm_set1_epi8(1);
let mask = _mm_set1_epi8(0);
let r = _mm_test_all_zeros(a, mask);
assert_eq!(r, 1);
let a = i8x16::splat(0b101);
let mask = i8x16::splat(0b110);
let r = sse41::_mm_test_all_zeros(a.into(), mask.into());
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_test_all_zeros(a, mask);
assert_eq!(r, 0);
let a = i8x16::splat(0b011);
let mask = i8x16::splat(0b100);
let r = sse41::_mm_test_all_zeros(a.into(), mask.into());
let a = _mm_set1_epi8(0b011);
let mask = _mm_set1_epi8(0b100);
let r = _mm_test_all_zeros(a, mask);
assert_eq!(r, 1);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_test_all_ones() {
let a = i8x16::splat(-1);
let r = sse41::_mm_test_all_ones(a.into());
unsafe fn test_mm_test_all_ones() {
let a = _mm_set1_epi8(-1);
let r = _mm_test_all_ones(a);
assert_eq!(r, 1);
let a = i8x16::splat(0b101);
let r = sse41::_mm_test_all_ones(a.into());
let a = _mm_set1_epi8(0b101);
let r = _mm_test_all_ones(a);
assert_eq!(r, 0);
}
#[simd_test = "sse4.1"]
unsafe fn _mm_test_mix_ones_zeros() {
let a = i8x16::splat(0);
let mask = i8x16::splat(1);
let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into());
unsafe fn test_mm_test_mix_ones_zeros() {
let a = _mm_set1_epi8(0);
let mask = _mm_set1_epi8(1);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 0);
let a = i8x16::splat(-1);
let mask = i8x16::splat(0);
let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into());
let a = _mm_set1_epi8(-1);
let mask = _mm_set1_epi8(0);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 0);
let a = i8x16::splat(0b101);
let mask = i8x16::splat(0b110);
let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into());
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 1);
let a = i8x16::splat(0b101);
let mask = i8x16::splat(0b101);
let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into());
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b101);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 0);
}
}

View file

@ -1,6 +1,8 @@
//! `i686`'s Streaming SIMD Extensions 4.2 (SSE4.2)
use simd_llvm::*;
use v128::*;
use x86::*;
#[cfg(test)]
use stdsimd_test::assert_instr;
@ -10,22 +12,21 @@ use stdsimd_test::assert_instr;
#[inline(always)]
#[target_feature = "+sse4.2"]
#[cfg_attr(test, assert_instr(pcmpgtq))]
pub unsafe fn _mm_cmpgt_epi64(a: i64x2, b: i64x2) -> i64x2 {
a.gt(b)
pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
}
#[cfg(test)]
mod tests {
use v128::*;
use x86::i686::sse42;
use x86::*;
use stdsimd_test::simd_test;
#[simd_test = "sse4.2"]
unsafe fn _mm_cmpgt_epi64() {
let a = i64x2::splat(0x00).replace(1, 0x2a);
let b = i64x2::splat(0x00);
let i = sse42::_mm_cmpgt_epi64(a, b);
assert_eq!(i, i64x2::new(0x00, 0xffffffffffffffffu64 as i64));
unsafe fn test_mm_cmpgt_epi64() {
let a = _mm_setr_epi64x(0, 0x2a);
let b = _mm_set1_epi64x(0x00);
let i = _mm_cmpgt_epi64(a, b);
assert_eq!(i, _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64));
}
}