diff --git a/library/stdarch/coresimd/src/x86/i686/mmx.rs b/library/stdarch/coresimd/src/x86/i686/mmx.rs index acf43dc2b9a1..07f1008a9cc5 100644 --- a/library/stdarch/coresimd/src/x86/i686/mmx.rs +++ b/library/stdarch/coresimd/src/x86/i686/mmx.rs @@ -185,6 +185,69 @@ pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 { punpckldq(a, b) } +/// Set packed 16-bit integers in dst with the supplied values. +#[inline(always)] +#[target_feature = "+mmx"] +pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 { + _mm_setr_pi16(e0, e1, e2, e3) +} + +/// Set packed 32-bit integers in dst with the supplied values. +#[inline(always)] +#[target_feature = "+mmx"] +pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 { + _mm_setr_pi32(e0, e1) +} + +/// Set packed 8-bit integers in dst with the supplied values. +#[inline(always)] +#[target_feature = "+mmx"] +pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 { + _mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7) +} + +/// Broadcast 16-bit integer a to all all elements of dst. +#[inline(always)] +#[target_feature = "+mmx"] +pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 { + _mm_setr_pi16(a, a, a, a) +} + +/// Broadcast 32-bit integer a to all all elements of dst. +#[inline(always)] +#[target_feature = "+mmx"] +pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 { + _mm_setr_pi32(a, a) +} + +/// Broadcast 8-bit integer a to all all elements of dst. +#[inline(always)] +#[target_feature = "+mmx"] +pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 { + _mm_setr_pi8(a, a, a, a, a, a, a, a) +} + +/// Set packed 16-bit integers in dst with the supplied values in reverse order. +#[inline(always)] +#[target_feature = "+mmx"] +pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 { + mem::transmute(i16x4::new(e0, e1, e2, e3)) +} + +/// Set packed 32-bit integers in dst with the supplied values in reverse order. +#[inline(always)] +#[target_feature = "+mmx"] +pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 { + mem::transmute(i32x2::new(e0, e1)) +} + +/// Set packed 8-bit integers in dst with the supplied values in reverse order. +#[inline(always)] +#[target_feature = "+mmx"] +pub unsafe fn _mm_setr_pi8(e0: i8, e1: i8, e2: i8, e3: i8, e4: i8, e5: i8, e6: i8, e7: i8) -> __m64 { + mem::transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) +} + #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.mmx.padd.b"] diff --git a/library/stdarch/coresimd/src/x86/i686/sse.rs b/library/stdarch/coresimd/src/x86/i686/sse.rs index d3f1224c3a6c..20a1e4d236d9 100644 --- a/library/stdarch/coresimd/src/x86/i686/sse.rs +++ b/library/stdarch/coresimd/src/x86/i686/sse.rs @@ -1,6 +1,5 @@ //! `i686` Streaming SIMD Extensions (SSE) -use v64::*; use core::mem; use x86::*; @@ -204,7 +203,7 @@ pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: i32x2) -> __m128 { +pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 { cvtpi2ps(a, mem::transmute(b)) } @@ -215,7 +214,7 @@ pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: i32x2) -> __m128 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: i32x2) -> __m128 { +pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 { _mm_cvtpi32_ps(a, b) } @@ -274,7 +273,7 @@ pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(cvtpi2ps))] -pub unsafe fn _mm_cvtpi32x2_ps(a: i32x2, b: i32x2) -> __m128 { +pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 { let c = i586::_mm_setzero_ps(); let c = _mm_cvtpi32_ps(c, b); let c = i586::_mm_movelh_ps(c, c); @@ -314,7 +313,7 @@ pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] -pub unsafe fn _mm_extract_pi16(a: i16x4, imm2: i32) -> i16 { +pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i16 { macro_rules! call { ($imm2:expr) => { pextrw(mem::transmute(a), $imm2) as i16 } } @@ -326,7 +325,7 @@ pub unsafe fn _mm_extract_pi16(a: i16x4, imm2: i32) -> i16 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pextrw, imm2 = 0))] -pub unsafe fn _m_pextrw(a: i16x4, imm2: i32) -> i16 { +pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i16 { _mm_extract_pi16(a, imm2) } @@ -359,7 +358,7 @@ pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pmovmskb))] -pub unsafe fn _mm_movemask_pi8(a: i16x4) -> i32 { +pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 { pmovmskb(mem::transmute(a)) } @@ -369,7 +368,7 @@ pub unsafe fn _mm_movemask_pi8(a: i16x4) -> i32 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pmovmskb))] -pub unsafe fn _m_pmovmskb(a: i16x4) -> i32 { +pub unsafe fn _m_pmovmskb(a: __m64) -> i32 { _mm_movemask_pi8(a) } @@ -399,7 +398,7 @@ pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(cvttps2pi))] -pub unsafe fn _mm_cvttps_pi32(a: __m128) -> i32x2 { +pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 { mem::transmute(cvttps2pi(a)) } @@ -408,7 +407,7 @@ pub unsafe fn _mm_cvttps_pi32(a: __m128) -> i32x2 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(cvttps2pi))] -pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> i32x2 { +pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 { _mm_cvttps_pi32(a) } @@ -458,107 +457,99 @@ pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 { mod tests { use std::mem; - use v64::{i16x4, i32x2, i8x8, u16x4, u8x8}; use x86::*; use stdsimd_test::simd_test; - #[target_feature = "+avx"] - unsafe fn assert_eq_m128(a: __m128, b: __m128) { - let r = _mm_cmpeq_ps(a, b); - if _mm_movemask_ps(r) != 0b1111 { - panic!("{:?} != {:?}", a, b); - } - } - #[simd_test = "sse"] unsafe fn test_mm_max_pi16() { - let a = i16x4::new(-1, 6, -3, 8); - let b = i16x4::new(5, -2, 7, -4); - let r = i16x4::new(5, 6, 7, 8); + let a = _mm_setr_pi16(-1, 6, -3, 8); + let b = _mm_setr_pi16(5, -2, 7, -4); + let r = _mm_setr_pi16(5, 6, 7, 8); - assert_eq!(r, i16x4::from(_mm_max_pi16(a.into(), b.into()))); - assert_eq!(r, i16x4::from(_m_pmaxsw(a.into(), b.into()))); + assert_eq!(r, _mm_max_pi16(a, b)); + assert_eq!(r, _m_pmaxsw(a, b)); } #[simd_test = "sse"] unsafe fn test_mm_max_pu8() { - let a = u8x8::new(2, 6, 3, 8, 2, 6, 3, 8); - let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4); - let r = u8x8::new(5, 6, 7, 8, 5, 6, 7, 8); + let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8); + let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4); + let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8); - assert_eq!(r, u8x8::from(_mm_max_pu8(a.into(), b.into()))); - assert_eq!(r, u8x8::from(_m_pmaxub(a.into(), b.into()))); + assert_eq!(r, _mm_max_pu8(a, b)); + assert_eq!(r, _m_pmaxub(a, b)); } #[simd_test = "sse"] unsafe fn test_mm_min_pi16() { - let a = i16x4::new(-1, 6, -3, 8); - let b = i16x4::new(5, -2, 7, -4); - let r = i16x4::new(-1, -2, -3, -4); + let a = _mm_setr_pi16(-1, 6, -3, 8); + let b = _mm_setr_pi16(5, -2, 7, -4); + let r = _mm_setr_pi16(-1, -2, -3, -4); - assert_eq!(r, i16x4::from(_mm_min_pi16(a.into(), b.into()))); - assert_eq!(r, i16x4::from(_m_pminsw(a.into(), b.into()))); + assert_eq!(r, _mm_min_pi16(a, b)); + assert_eq!(r, _m_pminsw(a, b)); } #[simd_test = "sse"] unsafe fn test_mm_min_pu8() { - let a = u8x8::new(2, 6, 3, 8, 2, 6, 3, 8); - let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4); - let r = u8x8::new(2, 2, 3, 4, 2, 2, 3, 4); + let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8); + let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4); + let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4); - assert_eq!(r, u8x8::from(_mm_min_pu8(a.into(), b.into()))); - assert_eq!(r, u8x8::from(_m_pminub(a.into(), b.into()))); + assert_eq!(r, _mm_min_pu8(a, b)); + assert_eq!(r, _m_pminub(a, b)); } #[simd_test = "sse"] unsafe fn test_mm_mulhi_pu16() { - let (a, b) = (u16x4::splat(1000), u16x4::splat(1001)); - let r = u16x4::from(_mm_mulhi_pu16(a.into(), b.into())); - assert_eq!(r, u16x4::splat(15)); + let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); + let r = _mm_mulhi_pu16(a, b); + assert_eq!(r, _mm_set1_pi16(15)); } #[simd_test = "sse"] unsafe fn test_m_pmulhuw() { - let (a, b) = (u16x4::splat(1000), u16x4::splat(1001)); - let r = _m_pmulhuw(a.into(), b.into()); - assert_eq!(r, u16x4::splat(15).into()); + let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001)); + let r = _m_pmulhuw(a, b); + assert_eq!(r, _mm_set1_pi16(15)); } #[simd_test = "sse"] unsafe fn test_mm_avg_pu8() { - let (a, b) = (u8x8::splat(3), u8x8::splat(9)); - let r = u8x8::from(_mm_avg_pu8(a.into(), b.into())); - assert_eq!(r, u8x8::splat(6)); + let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9)); + let r = _mm_avg_pu8(a, b); + assert_eq!(r, _mm_set1_pi8(6)); - let r = u8x8::from(_m_pavgb(a.into(), b.into())); - assert_eq!(r, u8x8::splat(6)); + let r = _m_pavgb(a, b); + assert_eq!(r, _mm_set1_pi8(6)); } #[simd_test = "sse"] unsafe fn test_mm_avg_pu16() { - let (a, b) = (u16x4::splat(3), u16x4::splat(9)); - let r = u16x4::from(_mm_avg_pu16(a.into(), b.into())); - assert_eq!(r, u16x4::splat(6)); + let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9)); + let r = _mm_avg_pu16(a, b); + assert_eq!(r, _mm_set1_pi16(6)); - let r = u16x4::from(_m_pavgw(a.into(), b.into())); - assert_eq!(r, u16x4::splat(6)); + let r = _m_pavgw(a, b); + assert_eq!(r, _mm_set1_pi16(6)); } #[simd_test = "sse"] unsafe fn test_mm_sad_pu8() { - let a = u8x8::new(255, 254, 253, 252, 1, 2, 3, 4); - let b = u8x8::new(0, 0, 0, 0, 2, 1, 2, 1); - let r = _mm_sad_pu8(a.into(), b.into()); - assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0))); + let a = _mm_setr_pi8(255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, + 1, 2, 3, 4); + let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1); + let r = _mm_sad_pu8(a, b); + assert_eq!(r, mem::transmute(_mm_setr_pi16(1020, 0, 0, 0))); - let r = _m_psadbw(a.into(), b.into()); - assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0))); + let r = _m_psadbw(a, b); + assert_eq!(r, mem::transmute(_mm_setr_pi16(1020, 0, 0, 0))); } #[simd_test = "sse"] unsafe fn test_mm_cvtpi32_ps() { let a = _mm_setr_ps(0., 0., 3., 4.); - let b = i32x2::new(1, 2); + let b = _mm_setr_pi32(1, 2); let expected = _mm_setr_ps(1., 2., 3., 4.); let r = _mm_cvtpi32_ps(a, b); assert_eq_m128(r, expected); @@ -569,40 +560,40 @@ mod tests { #[simd_test = "sse"] unsafe fn test_mm_cvtpi16_ps() { - let a = i16x4::new(1, 2, 3, 4); + let a = _mm_setr_pi16(1, 2, 3, 4); let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpi16_ps(a.into()); + let r = _mm_cvtpi16_ps(a); assert_eq_m128(r, expected); } #[simd_test = "sse"] unsafe fn test_mm_cvtpu16_ps() { - let a = u16x4::new(1, 2, 3, 4); + let a = _mm_setr_pi16(1, 2, 3, 4); let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpu16_ps(a.into()); + let r = _mm_cvtpu16_ps(a); assert_eq_m128(r, expected); } #[simd_test = "sse"] unsafe fn test_mm_cvtpi8_ps() { - let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpi8_ps(a.into()); + let r = _mm_cvtpi8_ps(a); assert_eq_m128(r, expected); } #[simd_test = "sse"] unsafe fn test_mm_cvtpu8_ps() { - let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8); let expected = _mm_setr_ps(1., 2., 3., 4.); - let r = _mm_cvtpu8_ps(a.into()); + let r = _mm_cvtpu8_ps(a); assert_eq_m128(r, expected); } #[simd_test = "sse"] unsafe fn test_mm_cvtpi32x2_ps() { - let a = i32x2::new(1, 2); - let b = i32x2::new(3, 4); + let a = _mm_setr_pi32(1, 2); + let b = _mm_setr_pi32(3, 4); let expected = _mm_setr_ps(1., 2., 3., 4.); let r = _mm_cvtpi32x2_ps(a, b); assert_eq_m128(r, expected); @@ -610,24 +601,25 @@ mod tests { #[simd_test = "sse"] unsafe fn test_mm_maskmove_si64() { - let a = i8x8::splat(9); - let mask = i8x8::splat(0).replace(2, 0x80u8 as i8); - let mut r = i8x8::splat(0); + let a = _mm_set1_pi8(9); + let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0); + let mut r = _mm_set1_pi8(0); _mm_maskmove_si64( - a.into(), - mask.into(), + a, + mask, &mut r as *mut _ as *mut i8, ); - assert_eq!(r, i8x8::splat(0).replace(2, 9)); + let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0); + assert_eq!(r, e); - let mut r = i8x8::splat(0); - _m_maskmovq(a.into(), mask.into(), &mut r as *mut _ as *mut i8); - assert_eq!(r, i8x8::splat(0).replace(2, 9)); + let mut r = _mm_set1_pi8(0); + _m_maskmovq(a, mask, &mut r as *mut _ as *mut i8); + assert_eq!(r, e); } #[simd_test = "sse"] unsafe fn test_mm_extract_pi16() { - let a = i16x4::new(1, 2, 3, 4); + let a = _mm_setr_pi16(1, 2, 3, 4); let r = _mm_extract_pi16(a, 0); assert_eq!(r, 1); let r = _mm_extract_pi16(a, 1); @@ -639,21 +631,21 @@ mod tests { #[simd_test = "sse"] unsafe fn test_mm_insert_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let r = i16x4::from(_mm_insert_pi16(a.into(), 0, 0b0)); - let expected = i16x4::new(0, 2, 3, 4); + let a = _mm_setr_pi16(1, 2, 3, 4); + let r = _mm_insert_pi16(a, 0, 0b0); + let expected = _mm_setr_pi16(0, 2, 3, 4); assert_eq!(r, expected); - let r = i16x4::from(_mm_insert_pi16(a.into(), 0, 0b10)); - let expected = i16x4::new(1, 2, 0, 4); + let r = _mm_insert_pi16(a, 0, 0b10); + let expected = _mm_setr_pi16(1, 2, 0, 4); assert_eq!(r, expected); - let r = i16x4::from(_m_pinsrw(a.into(), 0, 0b10)); + let r = _m_pinsrw(a, 0, 0b10); assert_eq!(r, expected); } #[simd_test = "sse"] unsafe fn test_mm_movemask_pi8() { - let a = i16x4::new(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000); + let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000); let r = _mm_movemask_pi8(a); assert_eq!(r, 0b10001); @@ -663,28 +655,28 @@ mod tests { #[simd_test = "sse"] unsafe fn test_mm_shuffle_pi16() { - let a = i16x4::new(1, 2, 3, 4); - let r = i16x4::from(_mm_shuffle_pi16(a.into(), 0b00_01_01_11)); - let expected = i16x4::new(4, 2, 2, 1); + let a = _mm_setr_pi16(1, 2, 3, 4); + let r = _mm_shuffle_pi16(a, 0b00_01_01_11); + let expected = _mm_setr_pi16(4, 2, 2, 1); assert_eq!(r, expected); - let r = i16x4::from(_m_pshufw(a.into(), 0b00_01_01_11)); + let r = _m_pshufw(a, 0b00_01_01_11); assert_eq!(r, expected); } #[simd_test = "sse"] unsafe fn test_mm_cvtps_pi32() { let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); - let r = i32x2::new(1, 2); + let r = _mm_setr_pi32(1, 2); - assert_eq!(r, i32x2::from(_mm_cvtps_pi32(a))); - assert_eq!(r, i32x2::from(_mm_cvt_ps2pi(a))); + assert_eq!(r, _mm_cvtps_pi32(a)); + assert_eq!(r, _mm_cvt_ps2pi(a)); } #[simd_test = "sse"] unsafe fn test_mm_cvttps_pi32() { let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); - let r = i32x2::new(7, 2); + let r = _mm_setr_pi32(7, 2); assert_eq!(r, _mm_cvttps_pi32(a)); assert_eq!(r, _mm_cvtt_ps2pi(a)); @@ -693,14 +685,14 @@ mod tests { #[simd_test = "sse"] unsafe fn test_mm_cvtps_pi16() { let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); - let r = i16x4::new(7, 2, 3, 4); - assert_eq!(r, i16x4::from(_mm_cvtps_pi16(a))); + let r = _mm_setr_pi16(7, 2, 3, 4); + assert_eq!(r, _mm_cvtps_pi16(a)); } #[simd_test = "sse"] unsafe fn test_mm_cvtps_pi8() { let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0); - let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0); - assert_eq!(r, i8x8::from(_mm_cvtps_pi8(a))); + let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0); + assert_eq!(r, _mm_cvtps_pi8(a)); } } diff --git a/library/stdarch/coresimd/src/x86/i686/sse2.rs b/library/stdarch/coresimd/src/x86/i686/sse2.rs index c9b5fd3fa071..73f4a12f24b5 100644 --- a/library/stdarch/coresimd/src/x86/i686/sse2.rs +++ b/library/stdarch/coresimd/src/x86/i686/sse2.rs @@ -1,8 +1,9 @@ //! `i686`'s Streaming SIMD Extensions 2 (SSE2) use core::mem; -use v128::*; -use v64::*; + +use simd_llvm::simd_extract; +use x86::*; #[cfg(test)] use stdsimd_test::assert_instr; @@ -22,7 +23,7 @@ pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(pmuludq))] -pub unsafe fn _mm_mul_su32(a: u32x2, b: u32x2) -> __m64 { +pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 { pmuludq(mem::transmute(a), mem::transmute(b)) } @@ -41,8 +42,8 @@ pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtpi2pd))] -pub unsafe fn _mm_cvtpi32_pd(a: i32x2) -> f64x2 { - cvtpi2pd(mem::transmute(a)) +pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d { + cvtpi2pd(a) } /// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with @@ -50,8 +51,8 @@ pub unsafe fn _mm_cvtpi32_pd(a: i32x2) -> f64x2 { #[inline(always)] #[target_feature = "+sse2"] // no particular instruction to test -pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> i64x2 { - i64x2::new(mem::transmute(e0), mem::transmute(e1)) +pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i { + _mm_set_epi64x(mem::transmute(e1), mem::transmute(e0)) } /// Initializes both values in a 128-bit vector of [2 x i64] with the @@ -59,8 +60,8 @@ pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> i64x2 { #[inline(always)] #[target_feature = "+sse2"] // no particular instruction to test -pub unsafe fn _mm_set1_epi64(a: __m64) -> i64x2 { - i64x2::new(mem::transmute(a), mem::transmute(a)) +pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i { + _mm_set_epi64x(mem::transmute(a), mem::transmute(a)) } /// Constructs a 128-bit integer vector, initialized in reverse order @@ -68,8 +69,8 @@ pub unsafe fn _mm_set1_epi64(a: __m64) -> i64x2 { #[inline(always)] #[target_feature = "+sse2"] // no particular instruction to test -pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> i64x2 { - i64x2::new(mem::transmute(e1), mem::transmute(e0)) +pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i { + _mm_set_epi64x(mem::transmute(e0), mem::transmute(e1)) } /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit @@ -78,8 +79,8 @@ pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> i64x2 { #[target_feature = "+sse2"] // #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong // instr? -pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 { - mem::transmute(a.extract(0)) +pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 { + mem::transmute(simd_extract::<_, i64>(a, 0)) } /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the @@ -88,8 +89,8 @@ pub unsafe fn _mm_movepi64_pi64(a: i64x2) -> __m64 { #[target_feature = "+sse2"] // #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong // instr? -pub unsafe fn _mm_movpi64_epi64(a: __m64) -> i64x2 { - i64x2::new(mem::transmute(a), 0) +pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i { + _mm_set_epi64x(0, mem::transmute(a)) } /// Converts the two double-precision floating-point elements of a @@ -98,8 +99,8 @@ pub unsafe fn _mm_movpi64_epi64(a: __m64) -> i64x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvtpd2pi))] -pub unsafe fn _mm_cvtpd_pi32(a: f64x2) -> i32x2 { - mem::transmute(cvtpd2pi(a)) +pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 { + cvtpd2pi(a) } /// Converts the two double-precision floating-point elements of a @@ -110,8 +111,8 @@ pub unsafe fn _mm_cvtpd_pi32(a: f64x2) -> i32x2 { #[inline(always)] #[target_feature = "+sse2"] #[cfg_attr(test, assert_instr(cvttpd2pi))] -pub unsafe fn _mm_cvttpd_pi32(a: f64x2) -> i32x2 { - mem::transmute(cvttpd2pi(a)) +pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 { + cvttpd2pi(a) } #[allow(improper_ctypes)] @@ -123,11 +124,11 @@ extern "C" { #[link_name = "llvm.x86.mmx.psub.q"] fn psubq(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.sse.cvtpi2pd"] - fn cvtpi2pd(a: __m64) -> f64x2; + fn cvtpi2pd(a: __m64) -> __m128d; #[link_name = "llvm.x86.sse.cvtpd2pi"] - fn cvtpd2pi(a: f64x2) -> __m64; + fn cvtpd2pi(a: __m128d) -> __m64; #[link_name = "llvm.x86.sse.cvttpd2pi"] - fn cvttpd2pi(a: f64x2) -> __m64; + fn cvttpd2pi(a: __m128d) -> __m64; } #[cfg(test)] @@ -136,74 +137,72 @@ mod tests { use stdsimd_test::simd_test; - use v128::*; - use v64::*; - use x86::i686::sse2; + use x86::*; #[simd_test = "sse2"] - unsafe fn _mm_add_si64() { + unsafe fn test_mm_add_si64() { let a = 1i64; let b = 2i64; let expected = 3i64; - let r = sse2::_mm_add_si64(mem::transmute(a), mem::transmute(b)); + let r = _mm_add_si64(mem::transmute(a), mem::transmute(b)); assert_eq!(mem::transmute::<__m64, i64>(r), expected); } #[simd_test = "sse2"] - unsafe fn _mm_mul_su32() { - let a = u32x2::new(1, 2); - let b = u32x2::new(3, 4); + unsafe fn test_mm_mul_su32() { + let a = _mm_setr_pi32(1, 2); + let b = _mm_setr_pi32(3, 4); let expected = 3u64; - let r = sse2::_mm_mul_su32(a, b); + let r = _mm_mul_su32(a, b); assert_eq!(r, mem::transmute(expected)); } #[simd_test = "sse2"] - unsafe fn _mm_sub_si64() { + unsafe fn test_mm_sub_si64() { let a = 1i64; let b = 2i64; let expected = -1i64; - let r = sse2::_mm_sub_si64(mem::transmute(a), mem::transmute(b)); + let r = _mm_sub_si64(mem::transmute(a), mem::transmute(b)); assert_eq!(mem::transmute::<__m64, i64>(r), expected); } #[simd_test = "sse2"] - unsafe fn _mm_cvtpi32_pd() { - let a = i32x2::new(1, 2); - let expected = f64x2::new(1., 2.); - let r = sse2::_mm_cvtpi32_pd(a); - assert_eq!(r, expected); + unsafe fn test_mm_cvtpi32_pd() { + let a = _mm_setr_pi32(1, 2); + let expected = _mm_setr_pd(1., 2.); + let r = _mm_cvtpi32_pd(a); + assert_eq_m128d(r, expected); } #[simd_test = "sse2"] - unsafe fn _mm_set_epi64() { + unsafe fn test_mm_set_epi64() { let r = - sse2::_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64)); - assert_eq!(r, i64x2::new(2, 1)); + _mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64)); + assert_eq!(r, _mm_setr_epi64x(2, 1)); } #[simd_test = "sse2"] - unsafe fn _mm_set1_epi64() { - let r = sse2::_mm_set1_epi64(mem::transmute(1i64)); - assert_eq!(r, i64x2::new(1, 1)); + unsafe fn test_mm_set1_epi64() { + let r = _mm_set1_epi64(mem::transmute(1i64)); + assert_eq!(r, _mm_setr_epi64x(1, 1)); } #[simd_test = "sse2"] - unsafe fn _mm_setr_epi64() { + unsafe fn test_mm_setr_epi64() { let r = - sse2::_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64)); - assert_eq!(r, i64x2::new(1, 2)); + _mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64)); + assert_eq!(r, _mm_setr_epi64x(1, 2)); } #[simd_test = "sse2"] - unsafe fn _mm_movepi64_pi64() { - let r = sse2::_mm_movepi64_pi64(i64x2::new(5, 0)); - assert_eq!(r, mem::transmute(i8x8::new(5, 0, 0, 0, 0, 0, 0, 0))); + unsafe fn test_mm_movepi64_pi64() { + let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0)); + assert_eq!(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_movpi64_epi64() { - let r = sse2::_mm_movpi64_epi64(mem::transmute(i8x8::new( + unsafe fn test_mm_movpi64_epi64() { + let r = _mm_movpi64_epi64(_mm_setr_pi8( 5, 0, 0, @@ -212,27 +211,27 @@ mod tests { 0, 0, 0, - ))); - assert_eq!(r, i64x2::new(5, 0)); + )); + assert_eq!(r, _mm_setr_epi64x(5, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_cvtpd_pi32() { - let a = f64x2::new(5., 0.); - let r = sse2::_mm_cvtpd_pi32(a); - assert_eq!(r, i32x2::new(5, 0)); + unsafe fn test_mm_cvtpd_pi32() { + let a = _mm_setr_pd(5., 0.); + let r = _mm_cvtpd_pi32(a); + assert_eq!(r, _mm_setr_pi32(5, 0)); } #[simd_test = "sse2"] - unsafe fn _mm_cvttpd_pi32() { + unsafe fn test_mm_cvttpd_pi32() { use std::{f64, i32}; - let a = f64x2::new(5., 0.); - let r = sse2::_mm_cvttpd_pi32(a); - assert_eq!(r, i32x2::new(5, 0)); + let a = _mm_setr_pd(5., 0.); + let r = _mm_cvttpd_pi32(a); + assert_eq!(r, _mm_setr_pi32(5, 0)); - let a = f64x2::new(f64::NEG_INFINITY, f64::NAN); - let r = sse2::_mm_cvttpd_pi32(a); - assert_eq!(r, i32x2::new(i32::MIN, i32::MIN)); + let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); + let r = _mm_cvttpd_pi32(a); + assert_eq!(r, _mm_setr_pi32(i32::MIN, i32::MIN)); } } diff --git a/library/stdarch/coresimd/src/x86/i686/sse41.rs b/library/stdarch/coresimd/src/x86/i686/sse41.rs index d088c3042366..8bc84c05281e 100644 --- a/library/stdarch/coresimd/src/x86/i686/sse41.rs +++ b/library/stdarch/coresimd/src/x86/i686/sse41.rs @@ -9,11 +9,11 @@ use stdsimd_test::assert_instr; #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.sse41.ptestz"] - fn ptestz(a: i64x2, mask: i64x2) -> i32; + fn ptestz(a: __m128i, mask: __m128i) -> i32; #[link_name = "llvm.x86.sse41.ptestc"] - fn ptestc(a: i64x2, mask: i64x2) -> i32; + fn ptestc(a: __m128i, mask: __m128i) -> i32; #[link_name = "llvm.x86.sse41.ptestnzc"] - fn ptestnzc(a: i64x2, mask: i64x2) -> i32; + fn ptestnzc(a: __m128i, mask: __m128i) -> i32; } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -33,7 +33,7 @@ extern "C" { #[target_feature = "+sse4.1"] #[cfg_attr(test, assert_instr(ptest))] pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { - ptestz(i64x2::from(a), i64x2::from(mask)) + ptestz(a, mask) } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -53,7 +53,7 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { #[target_feature = "+sse4.1"] #[cfg_attr(test, assert_instr(ptest))] pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { - ptestc(i64x2::from(a), i64x2::from(mask)) + ptestc(a, mask) } /// Tests whether the specified bits in a 128-bit integer vector are @@ -73,7 +73,7 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { #[target_feature = "+sse4.1"] #[cfg_attr(test, assert_instr(ptest))] pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { - ptestnzc(i64x2::from(a), i64x2::from(mask)) + ptestnzc(a, mask) } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -138,104 +138,103 @@ pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 { #[cfg(test)] mod tests { use stdsimd_test::simd_test; - use x86::i686::sse41; - use v128::*; + use x86::*; #[simd_test = "sse4.1"] - unsafe fn _mm_testz_si128() { - let a = i8x16::splat(1); - let mask = i8x16::splat(0); - let r = sse41::_mm_testz_si128(a.into(), mask.into()); + unsafe fn test_mm_testz_si128() { + let a = _mm_set1_epi8(1); + let mask = _mm_set1_epi8(0); + let r = _mm_testz_si128(a, mask); assert_eq!(r, 1); - let a = i8x16::splat(0b101); - let mask = i8x16::splat(0b110); - let r = sse41::_mm_testz_si128(a.into(), mask.into()); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_testz_si128(a, mask); assert_eq!(r, 0); - let a = i8x16::splat(0b011); - let mask = i8x16::splat(0b100); - let r = sse41::_mm_testz_si128(a.into(), mask.into()); + let a = _mm_set1_epi8(0b011); + let mask = _mm_set1_epi8(0b100); + let r = _mm_testz_si128(a, mask); assert_eq!(r, 1); } #[simd_test = "sse4.1"] - unsafe fn _mm_testc_si128() { - let a = i8x16::splat(-1); - let mask = i8x16::splat(0); - let r = sse41::_mm_testc_si128(a.into(), mask.into()); + unsafe fn test_mm_testc_si128() { + let a = _mm_set1_epi8(-1); + let mask = _mm_set1_epi8(0); + let r = _mm_testc_si128(a, mask); assert_eq!(r, 1); - let a = i8x16::splat(0b101); - let mask = i8x16::splat(0b110); - let r = sse41::_mm_testc_si128(a.into(), mask.into()); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_testc_si128(a, mask); assert_eq!(r, 0); - let a = i8x16::splat(0b101); - let mask = i8x16::splat(0b100); - let r = sse41::_mm_testc_si128(a.into(), mask.into()); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b100); + let r = _mm_testc_si128(a, mask); assert_eq!(r, 1); } #[simd_test = "sse4.1"] - unsafe fn _mm_testnzc_si128() { - let a = i8x16::splat(0); - let mask = i8x16::splat(1); - let r = sse41::_mm_testnzc_si128(a.into(), mask.into()); + unsafe fn test_mm_testnzc_si128() { + let a = _mm_set1_epi8(0); + let mask = _mm_set1_epi8(1); + let r = _mm_testnzc_si128(a, mask); assert_eq!(r, 0); - let a = i8x16::splat(-1); - let mask = i8x16::splat(0); - let r = sse41::_mm_testnzc_si128(a.into(), mask.into()); + let a = _mm_set1_epi8(-1); + let mask = _mm_set1_epi8(0); + let r = _mm_testnzc_si128(a, mask); assert_eq!(r, 0); - let a = i8x16::splat(0b101); - let mask = i8x16::splat(0b110); - let r = sse41::_mm_testnzc_si128(a.into(), mask.into()); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_testnzc_si128(a, mask); assert_eq!(r, 1); - let a = i8x16::splat(0b101); - let mask = i8x16::splat(0b101); - let r = sse41::_mm_testnzc_si128(a.into(), mask.into()); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b101); + let r = _mm_testnzc_si128(a, mask); assert_eq!(r, 0); } #[simd_test = "sse4.1"] - unsafe fn _mm_test_all_zeros() { - let a = i8x16::splat(1); - let mask = i8x16::splat(0); - let r = sse41::_mm_test_all_zeros(a.into(), mask.into()); + unsafe fn test_mm_test_all_zeros() { + let a = _mm_set1_epi8(1); + let mask = _mm_set1_epi8(0); + let r = _mm_test_all_zeros(a, mask); assert_eq!(r, 1); - let a = i8x16::splat(0b101); - let mask = i8x16::splat(0b110); - let r = sse41::_mm_test_all_zeros(a.into(), mask.into()); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_test_all_zeros(a, mask); assert_eq!(r, 0); - let a = i8x16::splat(0b011); - let mask = i8x16::splat(0b100); - let r = sse41::_mm_test_all_zeros(a.into(), mask.into()); + let a = _mm_set1_epi8(0b011); + let mask = _mm_set1_epi8(0b100); + let r = _mm_test_all_zeros(a, mask); assert_eq!(r, 1); } #[simd_test = "sse4.1"] - unsafe fn _mm_test_all_ones() { - let a = i8x16::splat(-1); - let r = sse41::_mm_test_all_ones(a.into()); + unsafe fn test_mm_test_all_ones() { + let a = _mm_set1_epi8(-1); + let r = _mm_test_all_ones(a); assert_eq!(r, 1); - let a = i8x16::splat(0b101); - let r = sse41::_mm_test_all_ones(a.into()); + let a = _mm_set1_epi8(0b101); + let r = _mm_test_all_ones(a); assert_eq!(r, 0); } #[simd_test = "sse4.1"] - unsafe fn _mm_test_mix_ones_zeros() { - let a = i8x16::splat(0); - let mask = i8x16::splat(1); - let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into()); + unsafe fn test_mm_test_mix_ones_zeros() { + let a = _mm_set1_epi8(0); + let mask = _mm_set1_epi8(1); + let r = _mm_test_mix_ones_zeros(a, mask); assert_eq!(r, 0); - let a = i8x16::splat(-1); - let mask = i8x16::splat(0); - let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into()); + let a = _mm_set1_epi8(-1); + let mask = _mm_set1_epi8(0); + let r = _mm_test_mix_ones_zeros(a, mask); assert_eq!(r, 0); - let a = i8x16::splat(0b101); - let mask = i8x16::splat(0b110); - let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into()); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b110); + let r = _mm_test_mix_ones_zeros(a, mask); assert_eq!(r, 1); - let a = i8x16::splat(0b101); - let mask = i8x16::splat(0b101); - let r = sse41::_mm_test_mix_ones_zeros(a.into(), mask.into()); + let a = _mm_set1_epi8(0b101); + let mask = _mm_set1_epi8(0b101); + let r = _mm_test_mix_ones_zeros(a, mask); assert_eq!(r, 0); } } diff --git a/library/stdarch/coresimd/src/x86/i686/sse42.rs b/library/stdarch/coresimd/src/x86/i686/sse42.rs index a7fe082d699d..f831177d9650 100644 --- a/library/stdarch/coresimd/src/x86/i686/sse42.rs +++ b/library/stdarch/coresimd/src/x86/i686/sse42.rs @@ -1,6 +1,8 @@ //! `i686`'s Streaming SIMD Extensions 4.2 (SSE4.2) +use simd_llvm::*; use v128::*; +use x86::*; #[cfg(test)] use stdsimd_test::assert_instr; @@ -10,22 +12,21 @@ use stdsimd_test::assert_instr; #[inline(always)] #[target_feature = "+sse4.2"] #[cfg_attr(test, assert_instr(pcmpgtq))] -pub unsafe fn _mm_cmpgt_epi64(a: i64x2, b: i64x2) -> i64x2 { - a.gt(b) +pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i { + mem::transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } #[cfg(test)] mod tests { - use v128::*; - use x86::i686::sse42; + use x86::*; use stdsimd_test::simd_test; #[simd_test = "sse4.2"] - unsafe fn _mm_cmpgt_epi64() { - let a = i64x2::splat(0x00).replace(1, 0x2a); - let b = i64x2::splat(0x00); - let i = sse42::_mm_cmpgt_epi64(a, b); - assert_eq!(i, i64x2::new(0x00, 0xffffffffffffffffu64 as i64)); + unsafe fn test_mm_cmpgt_epi64() { + let a = _mm_setr_epi64x(0, 0x2a); + let b = _mm_set1_epi64x(0x00); + let i = _mm_cmpgt_epi64(a, b); + assert_eq!(i, _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64)); } }