diff --git a/library/stdarch/coresimd/src/x86/i686/mmx.rs b/library/stdarch/coresimd/src/x86/i686/mmx.rs index e6a06f0a1fb9..08f9f46f9b10 100644 --- a/library/stdarch/coresimd/src/x86/i686/mmx.rs +++ b/library/stdarch/coresimd/src/x86/i686/mmx.rs @@ -32,8 +32,8 @@ pub unsafe fn _mm_setzero_si64() -> __m64 { #[inline(always)] #[target_feature = "+mmx,+sse"] #[cfg_attr(test, assert_instr(packsswb))] -pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 { - mem::transmute(packsswb(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 { + packsswb(a, b) } /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers @@ -44,8 +44,8 @@ pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 { #[inline(always)] #[target_feature = "+mmx,+sse"] #[cfg_attr(test, assert_instr(packssdw))] -pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 { - mem::transmute(packssdw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 { + packssdw(a, b) } /// Compares the 8-bit integer elements of two 64-bit integer vectors of @@ -68,8 +68,8 @@ pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 { #[inline(always)] #[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(pcmpgtw))] -pub unsafe fn _mm_cmpgt_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(pcmpgtw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 { + pcmpgtw(a, b) } /// Unpacks the upper 32 bits from two 64-bit integer vectors of @@ -77,8 +77,8 @@ pub unsafe fn _mm_cmpgt_pi16(a: i16x4, b: i16x4) -> i16x4 { #[inline(always)] #[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected -pub unsafe fn _mm_unpackhi_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(punpckhwd(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 { + punpckhwd(a, b) } /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] @@ -95,8 +95,8 @@ pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 { #[inline(always)] #[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(punpcklwd))] -pub unsafe fn _mm_unpacklo_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(punpcklwd(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 { + punpcklwd(a, b) } #[allow(improper_ctypes)] @@ -134,7 +134,7 @@ mod tests { let a = i16x4::new(-1, 2, -3, 4); let b = i16x4::new(-5, 6, -7, 8); let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8); - assert_eq!(r, mmx::_mm_packs_pi16(a, b)); + assert_eq!(r, i8x8::from(mmx::_mm_packs_pi16(a.into(), b.into()))); } #[simd_test = "sse"] // FIXME: should be mmx @@ -142,7 +142,7 @@ mod tests { let a = i32x2::new(-1, 2); let b = i32x2::new(-5, 6); let r = i16x4::new(-1, 2, -5, 6); - assert_eq!(r, mmx::_mm_packs_pi32(a, b)); + assert_eq!(r, i16x4::from(mmx::_mm_packs_pi32(a.into(), b.into()))); } #[simd_test = "mmx"] @@ -158,7 +158,7 @@ mod tests { let a = i16x4::new(0, 1, 2, 3); let b = i16x4::new(4, 3, 2, 1); let r = i16x4::new(0, 0, 0, -1); - assert_eq!(r, mmx::_mm_cmpgt_pi16(a, b)); + assert_eq!(r, i16x4::from(mmx::_mm_cmpgt_pi16(a.into(), b.into()))); } #[simd_test = "mmx"] @@ -166,7 +166,7 @@ mod tests { let a = i16x4::new(0, 1, 2, 3); let b = i16x4::new(4, 5, 6, 7); let r = i16x4::new(2, 6, 3, 7); - assert_eq!(r, mmx::_mm_unpackhi_pi16(a, b)); + assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into()))); } #[simd_test = "mmx"] @@ -182,6 +182,6 @@ mod tests { let a = i16x4::new(0, 1, 2, 3); let b = i16x4::new(4, 5, 6, 7); let r = i16x4::new(0, 4, 1, 5); - assert_eq!(r, mmx::_mm_unpacklo_pi16(a, b)); + assert_eq!(r, i16x4::from(mmx::_mm_unpacklo_pi16(a.into(), b.into()))); } } diff --git a/library/stdarch/coresimd/src/x86/i686/sse.rs b/library/stdarch/coresimd/src/x86/i686/sse.rs index a47199f0732d..f6069a1f2f8a 100644 --- a/library/stdarch/coresimd/src/x86/i686/sse.rs +++ b/library/stdarch/coresimd/src/x86/i686/sse.rs @@ -50,8 +50,8 @@ extern "C" { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pmaxsw))] -pub unsafe fn _mm_max_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(pmaxsw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 { + pmaxsw(a, b) } /// Compares the packed 16-bit signed integers of `a` and `b` writing the @@ -59,7 +59,7 @@ pub unsafe fn _mm_max_pi16(a: i16x4, b: i16x4) -> i16x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pmaxsw))] -pub unsafe fn _m_pmaxsw(a: i16x4, b: i16x4) -> i16x4 { +pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 { _mm_max_pi16(a, b) } @@ -86,8 +86,8 @@ pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pminsw))] -pub unsafe fn _mm_min_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(pminsw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 { + pminsw(a, b) } /// Compares the packed 16-bit signed integers of `a` and `b` writing the @@ -95,7 +95,7 @@ pub unsafe fn _mm_min_pi16(a: i16x4, b: i16x4) -> i16x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pminsw))] -pub unsafe fn _m_pminsw(a: i16x4, b: i16x4) -> i16x4 { +pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 { _mm_min_pi16(a, b) } @@ -123,8 +123,8 @@ pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pmulhuw))] -pub unsafe fn _mm_mulhi_pu16(a: u16x4, b: u16x4) -> u16x4 { - mem::transmute(pmulhuw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 { + pmulhuw(a, b) } /// Multiplies packed 16-bit unsigned integer values and writes the @@ -133,7 +133,7 @@ pub unsafe fn _mm_mulhi_pu16(a: u16x4, b: u16x4) -> u16x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pmulhuw))] -pub unsafe fn _m_pmulhuw(a: u16x4, b: u16x4) -> u16x4 { +pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 { _mm_mulhi_pu16(a, b) } @@ -163,8 +163,8 @@ pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pavgw))] -pub unsafe fn _mm_avg_pu16(a: u16x4, b: u16x4) -> u16x4 { - mem::transmute(pavgw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 { + pavgw(a, b) } /// Computes the rounded averages of the packed unsigned 16-bit integer @@ -173,7 +173,7 @@ pub unsafe fn _mm_avg_pu16(a: u16x4, b: u16x4) -> u16x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pavgw))] -pub unsafe fn _m_pavgw(a: u16x4, b: u16x4) -> u16x4 { +pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 { _mm_avg_pu16(a, b) } @@ -225,7 +225,7 @@ pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 { /// float]. #[inline(always)] #[target_feature = "+sse"] -pub unsafe fn _mm_cvtpi16_ps(a: i16x4) -> f32x4 { +pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> f32x4 { let b = mmx::_mm_setzero_si64(); let b = mmx::_mm_cmpgt_pi16(mem::transmute(b), a); let c = mmx::_mm_unpackhi_pi16(a, b); @@ -240,14 +240,14 @@ pub unsafe fn _mm_cvtpi16_ps(a: i16x4) -> f32x4 { /// 128-bit vector of [4 x float]. #[inline(always)] #[target_feature = "+sse"] -pub unsafe fn _mm_cvtpu16_ps(a: u16x4) -> f32x4 { - let b = mem::transmute(mmx::_mm_setzero_si64()); - let c = mmx::_mm_unpackhi_pi16(a.as_i16x4(), b); +pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 { + let b = mmx::_mm_setzero_si64(); + let c = mmx::_mm_unpackhi_pi16(a, b); let r = i586::_mm_setzero_ps(); - let r = cvtpi2ps(r, mem::transmute(c)); + let r = cvtpi2ps(r, c); let r = i586::_mm_movelh_ps(r, r); - let c = mmx::_mm_unpacklo_pi16(a.as_i16x4(), b); - cvtpi2ps(r, mem::transmute(c)) + let c = mmx::_mm_unpacklo_pi16(a, b); + cvtpi2ps(r, c) } /// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8] @@ -256,9 +256,9 @@ pub unsafe fn _mm_cvtpu16_ps(a: u16x4) -> f32x4 { #[target_feature = "+sse"] pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 { let b = mmx::_mm_setzero_si64(); - let b = mmx::_mm_cmpgt_pi8(mem::transmute(b), a); + let b = mmx::_mm_cmpgt_pi8(b, a); let b = mmx::_mm_unpacklo_pi8(a, b); - _mm_cvtpi16_ps(mem::transmute(b)) + _mm_cvtpi16_ps(b) } /// Converts the lower four unsigned 8-bit integer values from a 64-bit @@ -267,8 +267,8 @@ pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 { #[target_feature = "+sse"] pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 { let b = mmx::_mm_setzero_si64(); - let b = mmx::_mm_unpacklo_pi8(a, mem::transmute(b)); - _mm_cvtpi16_ps(mem::transmute(b)) + let b = mmx::_mm_unpacklo_pi8(a, b); + _mm_cvtpi16_ps(b) } /// Converts the two 32-bit signed integer values from each 64-bit vector @@ -338,9 +338,9 @@ pub unsafe fn _m_pextrw(a: i16x4, imm2: i32) -> i16 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] -pub unsafe fn _mm_insert_pi16(a: i16x4, d: i32, imm2: i32) -> i16x4 { +pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 { macro_rules! call { - ($imm2:expr) => { mem::transmute(pinsrw(mem::transmute(a), d, $imm2)) } + ($imm2:expr) => { pinsrw(a, d, $imm2) } } constify_imm2!(imm2, call) } @@ -351,7 +351,7 @@ pub unsafe fn _mm_insert_pi16(a: i16x4, d: i32, imm2: i32) -> i16x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))] -pub unsafe fn _m_pinsrw(a: i16x4, d: i32, imm2: i32) -> i16x4 { +pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 { _mm_insert_pi16(a, d, imm2) } @@ -380,9 +380,9 @@ pub unsafe fn _m_pmovmskb(a: i16x4) -> i32 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] -pub unsafe fn _mm_shuffle_pi16(a: i16x4, imm8: i32) -> i16x4 { +pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 { macro_rules! call { - ($imm8:expr) => { mem::transmute(pshufw(mem::transmute(a), $imm8)) } + ($imm8:expr) => { pshufw(a, $imm8) } } constify_imm8!(imm8, call) } @@ -392,7 +392,7 @@ pub unsafe fn _mm_shuffle_pi16(a: i16x4, imm8: i32) -> i16x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pshufw, imm8 = 0))] -pub unsafe fn _m_pshufw(a: i16x4, imm8: i32) -> i16x4 { +pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 { _mm_shuffle_pi16(a, imm8) } @@ -419,8 +419,8 @@ pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2 { - mem::transmute(cvtps2pi(a)) +pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> __m64 { + cvtps2pi(a) } /// Convert the two lower packed single-precision (32-bit) floating-point @@ -428,7 +428,7 @@ pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 { +pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> __m64 { _mm_cvtps_pi32(a) } @@ -437,7 +437,7 @@ pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> i16x4 { +pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> __m64 { let b = _mm_cvtps_pi32(a); let a = i586::_mm_movehl_ps(a, a); let c = _mm_cvtps_pi32(a); @@ -450,10 +450,10 @@ pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> i16x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(cvtps2pi))] -pub unsafe fn _mm_cvtps_pi8(a: f32x4) -> i8x8 { +pub unsafe fn _mm_cvtps_pi8(a: f32x4) -> __m64 { let b = _mm_cvtps_pi16(a); let c = mmx::_mm_setzero_si64(); - mmx::_mm_packs_pi16(b, mem::transmute(c)) + mmx::_mm_packs_pi16(b, c) } #[cfg(test)] @@ -472,8 +472,8 @@ mod tests { let b = i16x4::new(5, -2, 7, -4); let r = i16x4::new(5, 6, 7, 8); - assert_eq!(r, sse::_mm_max_pi16(a, b)); - assert_eq!(r, sse::_m_pmaxsw(a, b)); + assert_eq!(r, i16x4::from(sse::_mm_max_pi16(a.into(), b.into()))); + assert_eq!(r, i16x4::from(sse::_m_pmaxsw(a.into(), b.into()))); } #[simd_test = "sse"] @@ -492,8 +492,8 @@ mod tests { let b = i16x4::new(5, -2, 7, -4); let r = i16x4::new(-1, -2, -3, -4); - assert_eq!(r, sse::_mm_min_pi16(a, b)); - assert_eq!(r, sse::_m_pminsw(a, b)); + assert_eq!(r, i16x4::from(sse::_mm_min_pi16(a.into(), b.into()))); + assert_eq!(r, i16x4::from(sse::_m_pminsw(a.into(), b.into()))); } #[simd_test = "sse"] @@ -509,7 +509,7 @@ mod tests { #[simd_test = "sse"] unsafe fn _mm_mulhi_pu16() { let (a, b) = (u16x4::splat(1000), u16x4::splat(1001)); - let r = sse::_mm_mulhi_pu16(a, b); + let r = u16x4::from(sse::_mm_mulhi_pu16(a.into(), b.into())); assert_eq!(r, u16x4::splat(15)); } @@ -526,10 +526,10 @@ mod tests { #[simd_test = "sse"] unsafe fn _mm_avg_pu16() { let (a, b) = (u16x4::splat(3), u16x4::splat(9)); - let r = sse::_mm_avg_pu16(a, b); + let r = u16x4::from(sse::_mm_avg_pu16(a.into(), b.into())); assert_eq!(r, u16x4::splat(6)); - let r = sse::_m_pavgw(a, b); + let r = u16x4::from(sse::_m_pavgw(a.into(), b.into())); assert_eq!(r, u16x4::splat(6)); } @@ -561,7 +561,7 @@ mod tests { unsafe fn _mm_cvtpi16_ps() { let a = i16x4::new(1, 2, 3, 4); let expected = f32x4::new(1., 2., 3., 4.); - let r = sse::_mm_cvtpi16_ps(a); + let r = sse::_mm_cvtpi16_ps(a.into()); assert_eq!(r, expected); } @@ -569,7 +569,7 @@ mod tests { unsafe fn _mm_cvtpu16_ps() { let a = u16x4::new(1, 2, 3, 4); let expected = f32x4::new(1., 2., 3., 4.); - let r = sse::_mm_cvtpu16_ps(a); + let r = sse::_mm_cvtpu16_ps(a.into()); assert_eq!(r, expected); } @@ -626,14 +626,14 @@ mod tests { #[simd_test = "sse"] unsafe fn _mm_insert_pi16() { let a = i16x4::new(1, 2, 3, 4); - let r = sse::_mm_insert_pi16(a, 0, 0b0); + let r = i16x4::from(sse::_mm_insert_pi16(a.into(), 0, 0b0)); let expected = i16x4::new(0, 2, 3, 4); assert_eq!(r, expected); - let r = sse::_mm_insert_pi16(a, 0, 0b10); + let r = i16x4::from(sse::_mm_insert_pi16(a.into(), 0, 0b10)); let expected = i16x4::new(1, 2, 0, 4); assert_eq!(r, expected); - let r = sse::_m_pinsrw(a, 0, 0b10); + let r = i16x4::from(sse::_m_pinsrw(a.into(), 0, 0b10)); assert_eq!(r, expected); } @@ -650,11 +650,11 @@ mod tests { #[simd_test = "sse"] unsafe fn _mm_shuffle_pi16() { let a = i16x4::new(1, 2, 3, 4); - let r = sse::_mm_shuffle_pi16(a, 0b00_01_01_11); + let r = i16x4::from(sse::_mm_shuffle_pi16(a.into(), 0b00_01_01_11)); let expected = i16x4::new(4, 2, 2, 1); assert_eq!(r, expected); - let r = sse::_m_pshufw(a, 0b00_01_01_11); + let r = i16x4::from(sse::_m_pshufw(a.into(), 0b00_01_01_11)); assert_eq!(r, expected); } @@ -663,8 +663,8 @@ mod tests { let a = f32x4::new(1.0, 2.0, 3.0, 4.0); let r = i32x2::new(1, 2); - assert_eq!(r, sse::_mm_cvtps_pi32(a)); - assert_eq!(r, sse::_mm_cvt_ps2pi(a)); + assert_eq!(r, i32x2::from(sse::_mm_cvtps_pi32(a))); + assert_eq!(r, i32x2::from(sse::_mm_cvt_ps2pi(a))); } #[simd_test = "sse"] @@ -680,13 +680,13 @@ mod tests { unsafe fn _mm_cvtps_pi16() { let a = f32x4::new(7.0, 2.0, 3.0, 4.0); let r = i16x4::new(7, 2, 3, 4); - assert_eq!(r, sse::_mm_cvtps_pi16(a)); + assert_eq!(r, i16x4::from(sse::_mm_cvtps_pi16(a))); } #[simd_test = "sse"] unsafe fn _mm_cvtps_pi8() { let a = f32x4::new(7.0, 2.0, 3.0, 4.0); let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0); - assert_eq!(r, sse::_mm_cvtps_pi8(a)); + assert_eq!(r, i8x8::from(sse::_mm_cvtps_pi8(a))); } } diff --git a/library/stdarch/coresimd/src/x86/i686/ssse3.rs b/library/stdarch/coresimd/src/x86/i686/ssse3.rs index 58f484c37687..bf31cbb647b2 100644 --- a/library/stdarch/coresimd/src/x86/i686/ssse3.rs +++ b/library/stdarch/coresimd/src/x86/i686/ssse3.rs @@ -3,7 +3,6 @@ #[cfg(test)] use stdsimd_test::assert_instr; -use core::mem; use v64::*; /// Compute the absolute value of packed 8-bit integers in `a` and @@ -29,8 +28,8 @@ pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(pabsd))] -pub unsafe fn _mm_abs_pi32(a: i32x2) -> u32x2 { - mem::transmute(pabsd(mem::transmute(a))) +pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 { + pabsd(a) } /// Shuffle packed 8-bit integers in `a` according to shuffle control mask in @@ -70,8 +69,8 @@ pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(phaddd))] -pub unsafe fn _mm_hadd_pi32(a: i32x2, b: i32x2) -> i32x2 { - mem::transmute(phaddd(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 { + phaddd(a, b) } /// Horizontally add the adjacent pairs of values contained in 2 packed @@ -98,8 +97,8 @@ pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(phsubd))] -pub unsafe fn _mm_hsub_pi32(a: i32x2, b: i32x2) -> i32x2 { - mem::transmute(phsubd(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 { + phsubd(a, b) } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -164,8 +163,8 @@ pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(psignd))] -pub unsafe fn _mm_sign_pi32(a: i32x2, b: i32x2) -> i32x2 { - mem::transmute(psignd(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 { + psignd(a, b) } #[allow(improper_ctypes)] @@ -240,7 +239,7 @@ mod tests { #[simd_test = "ssse3"] unsafe fn _mm_abs_pi32() { - let r = ssse3::_mm_abs_pi32(i32x2::splat(-5)); + let r = u32x2::from(ssse3::_mm_abs_pi32(i32x2::splat(-5).into())); assert_eq!(r, u32x2::splat(5)); } @@ -275,7 +274,7 @@ mod tests { let a = i32x2::new(1, 2); let b = i32x2::new(4, 128); let expected = i32x2::new(3, 132); - let r = ssse3::_mm_hadd_pi32(a, b); + let r = i32x2::from(ssse3::_mm_hadd_pi32(a.into(), b.into())); assert_eq!(r, expected); } @@ -302,7 +301,7 @@ mod tests { let a = i32x2::new(1, 2); let b = i32x2::new(4, 128); let expected = i32x2::new(-1, -124); - let r = ssse3::_mm_hsub_pi32(a, b); + let r = i32x2::from(ssse3::_mm_hsub_pi32(a.into(), b.into())); assert_eq!(r, expected); } @@ -356,7 +355,7 @@ mod tests { let a = i32x2::new(-1, 2); let b = i32x2::new(1, 0); let expected = i32x2::new(-1, 0); - let r = ssse3::_mm_sign_pi32(a, b); + let r = i32x2::from(ssse3::_mm_sign_pi32(a.into(), b.into())); assert_eq!(r, expected); } } diff --git a/library/stdarch/stdsimd-test/src/lib.rs b/library/stdarch/stdsimd-test/src/lib.rs index 309ad9d587f6..ccd5bb58e766 100644 --- a/library/stdarch/stdsimd-test/src/lib.rs +++ b/library/stdarch/stdsimd-test/src/lib.rs @@ -323,7 +323,12 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { break; } - let instruction_limit = 30; + let instruction_limit = match expected { + // cpuid returns a pretty big aggregate structure so excempt it from the + // slightly more restrictive 20 instructions below + "cpuid" => 30, + _ => 20, + }; let probably_only_one_instruction = function.instrs.len() < instruction_limit;