diff --git a/library/stdarch/coresimd/src/x86/i686/mmx.rs b/library/stdarch/coresimd/src/x86/i686/mmx.rs index 476ad957e6dd..e6a06f0a1fb9 100644 --- a/library/stdarch/coresimd/src/x86/i686/mmx.rs +++ b/library/stdarch/coresimd/src/x86/i686/mmx.rs @@ -56,8 +56,8 @@ pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 { #[inline(always)] #[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(pcmpgtb))] -pub unsafe fn _mm_cmpgt_pi8(a: i8x8, b: i8x8) -> i8x8 { - mem::transmute(pcmpgtb(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 { + pcmpgtb(a, b) } /// Compares the 16-bit integer elements of two 64-bit integer vectors of @@ -86,8 +86,8 @@ pub unsafe fn _mm_unpackhi_pi16(a: i16x4, b: i16x4) -> i16x4 { #[inline(always)] #[target_feature = "+mmx"] #[cfg_attr(test, assert_instr(punpcklbw))] -pub unsafe fn _mm_unpacklo_pi8(a: i8x8, b: i8x8) -> i8x8 { - mem::transmute(punpcklbw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 { + punpcklbw(a, b) } /// Unpacks the lower 32 bits from two 64-bit integer vectors of @@ -150,7 +150,7 @@ mod tests { let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1); let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1); - assert_eq!(r, mmx::_mm_cmpgt_pi8(a, b)); + assert_eq!(r, i8x8::from(mmx::_mm_cmpgt_pi8(a.into(), b.into()))); } #[simd_test = "mmx"] @@ -174,7 +174,7 @@ mod tests { let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15); let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11); - assert_eq!(r, mmx::_mm_unpacklo_pi8(a, b)); + assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into()))); } #[simd_test = "mmx"] diff --git a/library/stdarch/coresimd/src/x86/i686/sse.rs b/library/stdarch/coresimd/src/x86/i686/sse.rs index 92e3e110bc1c..a47199f0732d 100644 --- a/library/stdarch/coresimd/src/x86/i686/sse.rs +++ b/library/stdarch/coresimd/src/x86/i686/sse.rs @@ -68,8 +68,8 @@ pub unsafe fn _m_pmaxsw(a: i16x4, b: i16x4) -> i16x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pmaxub))] -pub unsafe fn _mm_max_pu8(a: u8x8, b: u8x8) -> u8x8 { - mem::transmute(pmaxub(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 { + pmaxub(a, b) } /// Compares the packed 8-bit signed integers of `a` and `b` writing the @@ -77,7 +77,7 @@ pub unsafe fn _mm_max_pu8(a: u8x8, b: u8x8) -> u8x8 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pmaxub))] -pub unsafe fn _m_pmaxub(a: u8x8, b: u8x8) -> u8x8 { +pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 { _mm_max_pu8(a, b) } @@ -104,8 +104,8 @@ pub unsafe fn _m_pminsw(a: i16x4, b: i16x4) -> i16x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pminub))] -pub unsafe fn _mm_min_pu8(a: u8x8, b: u8x8) -> u8x8 { - mem::transmute(pminub(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 { + pminub(a, b) } /// Compares the packed 8-bit signed integers of `a` and `b` writing the @@ -113,7 +113,7 @@ pub unsafe fn _mm_min_pu8(a: u8x8, b: u8x8) -> u8x8 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pminub))] -pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 { +pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 { _mm_min_pu8(a, b) } @@ -143,8 +143,8 @@ pub unsafe fn _m_pmulhuw(a: u16x4, b: u16x4) -> u16x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pavgb))] -pub unsafe fn _mm_avg_pu8(a: u8x8, b: u8x8) -> u8x8 { - mem::transmute(pavgb(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 { + pavgb(a, b) } /// Computes the rounded averages of the packed unsigned 8-bit integer @@ -153,7 +153,7 @@ pub unsafe fn _mm_avg_pu8(a: u8x8, b: u8x8) -> u8x8 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(pavgb))] -pub unsafe fn _m_pavgb(a: u8x8, b: u8x8) -> u8x8 { +pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 { _mm_avg_pu8(a, b) } @@ -184,8 +184,8 @@ pub unsafe fn _m_pavgw(a: u16x4, b: u16x4) -> u16x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(psadbw))] -pub unsafe fn _mm_sad_pu8(a: u8x8, b: u8x8) -> __m64 { - mem::transmute(psadbw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 { + psadbw(a, b) } /// Subtracts the corresponding 8-bit unsigned integer values of the two @@ -195,8 +195,8 @@ pub unsafe fn _mm_sad_pu8(a: u8x8, b: u8x8) -> __m64 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(psadbw))] -pub unsafe fn _m_psadbw(a: u8x8, b: u8x8) -> __m64 { - mem::transmute(_mm_sad_pu8(a, b)) +pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 { + _mm_sad_pu8(a, b) } /// Converts two elements of a 64-bit vector of [2 x i32] into two @@ -254,7 +254,7 @@ pub unsafe fn _mm_cvtpu16_ps(a: u16x4) -> f32x4 { /// into a 128-bit vector of [4 x float]. #[inline(always)] #[target_feature = "+sse"] -pub unsafe fn _mm_cvtpi8_ps(a: i8x8) -> f32x4 { +pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 { let b = mmx::_mm_setzero_si64(); let b = mmx::_mm_cmpgt_pi8(mem::transmute(b), a); let b = mmx::_mm_unpacklo_pi8(a, b); @@ -265,9 +265,9 @@ pub unsafe fn _mm_cvtpi8_ps(a: i8x8) -> f32x4 { /// vector of [8 x u8] into a 128-bit vector of [4 x float]. #[inline(always)] #[target_feature = "+sse"] -pub unsafe fn _mm_cvtpu8_ps(a: u8x8) -> f32x4 { +pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 { let b = mmx::_mm_setzero_si64(); - let b = mmx::_mm_unpacklo_pi8(a.as_i8x8(), mem::transmute(b)); + let b = mmx::_mm_unpacklo_pi8(a, mem::transmute(b)); _mm_cvtpi16_ps(mem::transmute(b)) } @@ -293,8 +293,8 @@ pub unsafe fn _mm_cvtpi32x2_ps(a: i32x2, b: i32x2) -> f32x4 { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(maskmovq))] -pub unsafe fn _mm_maskmove_si64(a: i8x8, mask: i8x8, mem_addr: *mut i8) { - maskmovq(mem::transmute(a), mem::transmute(mask), mem_addr) +pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) { + maskmovq(a, mask, mem_addr) } /// Conditionally copies the values from each 8-bit element in the first @@ -307,7 +307,7 @@ pub unsafe fn _mm_maskmove_si64(a: i8x8, mask: i8x8, mem_addr: *mut i8) { #[inline(always)] #[target_feature = "+sse"] #[cfg_attr(test, assert_instr(maskmovq))] -pub unsafe fn _m_maskmovq(a: i8x8, mask: i8x8, mem_addr: *mut i8) { +pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) { _mm_maskmove_si64(a, mask, mem_addr) } @@ -482,8 +482,8 @@ mod tests { let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4); let r = u8x8::new(5, 6, 7, 8, 5, 6, 7, 8); - assert_eq!(r, sse::_mm_max_pu8(a, b)); - assert_eq!(r, sse::_m_pmaxub(a, b)); + assert_eq!(r, u8x8::from(sse::_mm_max_pu8(a.into(), b.into()))); + assert_eq!(r, u8x8::from(sse::_m_pmaxub(a.into(), b.into()))); } #[simd_test = "sse"] @@ -502,8 +502,8 @@ mod tests { let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4); let r = u8x8::new(2, 2, 3, 4, 2, 2, 3, 4); - assert_eq!(r, sse::_mm_min_pu8(a, b)); - assert_eq!(r, sse::_m_pminub(a, b)); + assert_eq!(r, u8x8::from(sse::_mm_min_pu8(a.into(), b.into()))); + assert_eq!(r, u8x8::from(sse::_m_pminub(a.into(), b.into()))); } #[simd_test = "sse"] @@ -516,10 +516,10 @@ mod tests { #[simd_test = "sse"] unsafe fn _mm_avg_pu8() { let (a, b) = (u8x8::splat(3), u8x8::splat(9)); - let r = sse::_mm_avg_pu8(a, b); + let r = u8x8::from(sse::_mm_avg_pu8(a.into(), b.into())); assert_eq!(r, u8x8::splat(6)); - let r = sse::_m_pavgb(a, b); + let r = u8x8::from(sse::_m_pavgb(a.into(), b.into())); assert_eq!(r, u8x8::splat(6)); } @@ -538,10 +538,10 @@ mod tests { unsafe fn _mm_sad_pu8() { let a = u8x8::new(255, 254, 253, 252, 1, 2, 3, 4); let b = u8x8::new(0, 0, 0, 0, 2, 1, 2, 1); - let r = sse::_mm_sad_pu8(a, b); + let r = sse::_mm_sad_pu8(a.into(), b.into()); assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0))); - let r = sse::_m_psadbw(a, b); + let r = sse::_m_psadbw(a.into(), b.into()); assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0))); } @@ -577,7 +577,7 @@ mod tests { unsafe fn _mm_cvtpi8_ps() { let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let expected = f32x4::new(1., 2., 3., 4.); - let r = sse::_mm_cvtpi8_ps(a); + let r = sse::_mm_cvtpi8_ps(a.into()); assert_eq!(r, expected); } @@ -585,7 +585,7 @@ mod tests { unsafe fn _mm_cvtpu8_ps() { let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let expected = f32x4::new(1., 2., 3., 4.); - let r = sse::_mm_cvtpu8_ps(a); + let r = sse::_mm_cvtpu8_ps(a.into()); assert_eq!(r, expected); } @@ -603,11 +603,11 @@ mod tests { let a = i8x8::splat(9); let mask = i8x8::splat(0).replace(2, 0x80u8 as i8); let mut r = i8x8::splat(0); - sse::_mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8); + sse::_mm_maskmove_si64(a.into(), mask.into(), &mut r as *mut _ as *mut i8); assert_eq!(r, i8x8::splat(0).replace(2, 9)); let mut r = i8x8::splat(0); - sse::_m_maskmovq(a, mask, &mut r as *mut _ as *mut i8); + sse::_m_maskmovq(a.into(), mask.into(), &mut r as *mut _ as *mut i8); assert_eq!(r, i8x8::splat(0).replace(2, 9)); } diff --git a/library/stdarch/coresimd/src/x86/i686/sse41.rs b/library/stdarch/coresimd/src/x86/i686/sse41.rs index 9a9810b91612..16e767ff9791 100644 --- a/library/stdarch/coresimd/src/x86/i686/sse41.rs +++ b/library/stdarch/coresimd/src/x86/i686/sse41.rs @@ -32,7 +32,7 @@ extern "C" { #[target_feature = "+sse4.1"] #[cfg_attr(test, assert_instr(ptest))] pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { - ptestz(a.into(), mask.into()) + ptestz(i64x2::from(a), i64x2::from(mask)) } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -52,7 +52,7 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 { #[target_feature = "+sse4.1"] #[cfg_attr(test, assert_instr(ptest))] pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { - ptestc(a.into(), mask.into()) + ptestc(i64x2::from(a), i64x2::from(mask)) } /// Tests whether the specified bits in a 128-bit integer vector are @@ -72,7 +72,7 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 { #[target_feature = "+sse4.1"] #[cfg_attr(test, assert_instr(ptest))] pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 { - ptestnzc(a.into(), mask.into()) + ptestnzc(i64x2::from(a), i64x2::from(mask)) } /// Tests whether the specified bits in a 128-bit integer vector are all @@ -111,7 +111,8 @@ pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 { #[cfg_attr(test, assert_instr(pcmpeqd))] #[cfg_attr(test, assert_instr(ptest))] pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 { - _mm_testc_si128(a, ::x86::_mm_cmpeq_epi32(a.into(), a.into()).into()) + let b = i32x4::from(a); + _mm_testc_si128(a, __m128i::from(::x86::_mm_cmpeq_epi32(b, b))) } /// Tests whether the specified bits in a 128-bit integer vector are diff --git a/library/stdarch/coresimd/src/x86/i686/ssse3.rs b/library/stdarch/coresimd/src/x86/i686/ssse3.rs index ac20ac748c15..58f484c37687 100644 --- a/library/stdarch/coresimd/src/x86/i686/ssse3.rs +++ b/library/stdarch/coresimd/src/x86/i686/ssse3.rs @@ -11,8 +11,8 @@ use v64::*; #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(pabsb))] -pub unsafe fn _mm_abs_pi8(a: i8x8) -> u8x8 { - mem::transmute(pabsb(mem::transmute(a))) +pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 { + pabsb(a) } /// Compute the absolute value of packed 8-bit integers in `a`, and return the @@ -20,8 +20,8 @@ pub unsafe fn _mm_abs_pi8(a: i8x8) -> u8x8 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(pabsw))] -pub unsafe fn _mm_abs_pi16(a: i16x4) -> u16x4 { - mem::transmute(pabsw(mem::transmute(a))) +pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 { + pabsw(a) } /// Compute the absolute value of packed 32-bit integers in `a`, and return the @@ -38,8 +38,8 @@ pub unsafe fn _mm_abs_pi32(a: i32x2) -> u32x2 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(pshufb))] -pub unsafe fn _mm_shuffle_pi8(a: u8x8, b: u8x8) -> u8x8 { - mem::transmute(pshufb(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 { + pshufb(a, b) } /// Concatenates the two 64-bit integer vector operands, and right-shifts @@ -47,10 +47,10 @@ pub unsafe fn _mm_shuffle_pi8(a: u8x8, b: u8x8) -> u8x8 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(palignr, n = 15))] -pub unsafe fn _mm_alignr_pi8(a: u8x8, b: u8x8, n: i32) -> u8x8 { +pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 { macro_rules! call { ($imm8:expr) => { - mem::transmute(palignrb(mem::transmute(a), mem::transmute(b), $imm8)) + palignrb(a, b, $imm8) } } constify_imm8!(n, call) @@ -61,8 +61,8 @@ pub unsafe fn _mm_alignr_pi8(a: u8x8, b: u8x8, n: i32) -> u8x8 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(phaddw))] -pub unsafe fn _mm_hadd_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(phaddw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 { + phaddw(a, b) } /// Horizontally add the adjacent pairs of values contained in 2 packed @@ -80,8 +80,8 @@ pub unsafe fn _mm_hadd_pi32(a: i32x2, b: i32x2) -> i32x2 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(phaddsw))] -pub unsafe fn _mm_hadds_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(phaddsw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 { + phaddsw(a, b) } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -89,8 +89,8 @@ pub unsafe fn _mm_hadds_pi16(a: i16x4, b: i16x4) -> i16x4 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(phsubw))] -pub unsafe fn _mm_hsub_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(phsubw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 { + phsubw(a, b) } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -109,8 +109,8 @@ pub unsafe fn _mm_hsub_pi32(a: i32x2, b: i32x2) -> i32x2 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(phsubsw))] -pub unsafe fn _mm_hsubs_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(phsubsw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 { + phsubsw(a, b) } /// Multiplies corresponding pairs of packed 8-bit unsigned integer @@ -121,8 +121,8 @@ pub unsafe fn _mm_hsubs_pi16(a: i16x4, b: i16x4) -> i16x4 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(pmaddubsw))] -pub unsafe fn _mm_maddubs_pi16(a: u8x8, b: i8x8) -> i16x4 { - mem::transmute(pmaddubsw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 { + pmaddubsw(a, b) } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit @@ -131,8 +131,8 @@ pub unsafe fn _mm_maddubs_pi16(a: u8x8, b: i8x8) -> i16x4 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(pmulhrsw))] -pub unsafe fn _mm_mulhrs_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(pmulhrsw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 { + pmulhrsw(a, b) } /// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit @@ -142,8 +142,8 @@ pub unsafe fn _mm_mulhrs_pi16(a: i16x4, b: i16x4) -> i16x4 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(psignb))] -pub unsafe fn _mm_sign_pi8(a: i8x8, b: i8x8) -> i8x8 { - mem::transmute(psignb(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 { + psignb(a, b) } /// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit @@ -153,8 +153,8 @@ pub unsafe fn _mm_sign_pi8(a: i8x8, b: i8x8) -> i8x8 { #[inline(always)] #[target_feature = "+ssse3"] #[cfg_attr(test, assert_instr(psignw))] -pub unsafe fn _mm_sign_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(psignw(mem::transmute(a), mem::transmute(b))) +pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 { + psignw(a, b) } /// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit @@ -228,13 +228,13 @@ mod tests { #[simd_test = "ssse3"] unsafe fn _mm_abs_pi8() { - let r = ssse3::_mm_abs_pi8(i8x8::splat(-5)); + let r = u8x8::from(ssse3::_mm_abs_pi8(i8x8::splat(-5).into())); assert_eq!(r, u8x8::splat(5)); } #[simd_test = "ssse3"] unsafe fn _mm_abs_pi16() { - let r = ssse3::_mm_abs_pi16(i16x4::splat(-5)); + let r = u16x4::from(ssse3::_mm_abs_pi16(i16x4::splat(-5).into())); assert_eq!(r, u16x4::splat(5)); } @@ -249,7 +249,7 @@ mod tests { let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = u8x8::new(4, 128, 4, 3, 24, 12, 6, 19); let expected = u8x8::new(5, 0, 5, 4, 1, 5, 7, 4); - let r = ssse3::_mm_shuffle_pi8(a, b); + let r = u8x8::from(ssse3::_mm_shuffle_pi8(a.into(), b.into())); assert_eq!(r, expected); } @@ -257,7 +257,7 @@ mod tests { unsafe fn _mm_alignr_pi8() { let a = u32x2::new(0x89ABCDEF_u32, 0x01234567_u32); let b = u32x2::new(0xBBAA9988_u32, 0xFFDDEECC_u32); - let r = ssse3::_mm_alignr_pi8(u8x8::from(a), u8x8::from(b), 4); + let r = ssse3::_mm_alignr_pi8(u8x8::from(a).into(), u8x8::from(b).into(), 4); assert_eq!(r, ::std::mem::transmute(0x89abcdefffddeecc_u64)); } @@ -266,7 +266,7 @@ mod tests { let a = i16x4::new(1, 2, 3, 4); let b = i16x4::new(4, 128, 4, 3); let expected = i16x4::new(3, 7, 132, 7); - let r = ssse3::_mm_hadd_pi16(a, b); + let r = i16x4::from(ssse3::_mm_hadd_pi16(a.into(), b.into())); assert_eq!(r, expected); } @@ -284,7 +284,7 @@ mod tests { let a = i16x4::new(1, 2, 3, 4); let b = i16x4::new(32767, 1, -32768, -1); let expected = i16x4::new(3, 7, 32767, -32768); - let r = ssse3::_mm_hadds_pi16(a, b); + let r = i16x4::from(ssse3::_mm_hadds_pi16(a.into(), b.into())); assert_eq!(r, expected); } @@ -293,7 +293,7 @@ mod tests { let a = i16x4::new(1, 2, 3, 4); let b = i16x4::new(4, 128, 4, 3); let expected = i16x4::new(-1, -1, -124, 1); - let r = ssse3::_mm_hsub_pi16(a, b); + let r = i16x4::from(ssse3::_mm_hsub_pi16(a.into(), b.into())); assert_eq!(r, expected); } @@ -311,7 +311,7 @@ mod tests { let a = i16x4::new(1, 2, 3, 4); let b = i16x4::new(4, 128, 4, 3); let expected = i16x4::new(-1, -1, -124, 1); - let r = ssse3::_mm_hsubs_pi16(a, b); + let r = i16x4::from(ssse3::_mm_hsubs_pi16(a.into(), b.into())); assert_eq!(r, expected); } @@ -320,7 +320,7 @@ mod tests { let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); let b = i8x8::new(4, 63, 4, 3, 24, 12, 6, 19); let expected = i16x4::new(130, 24, 192, 194); - let r = ssse3::_mm_maddubs_pi16(a, b); + let r = i16x4::from(ssse3::_mm_maddubs_pi16(a.into(), b.into())); assert_eq!(r, expected); } @@ -329,7 +329,7 @@ mod tests { let a = i16x4::new(1, 2, 3, 4); let b = i16x4::new(4, 32767, -1, -32768); let expected = i16x4::new(0, 2, 0, -4); - let r = ssse3::_mm_mulhrs_pi16(a, b); + let r = i16x4::from(ssse3::_mm_mulhrs_pi16(a.into(), b.into())); assert_eq!(r, expected); } @@ -338,7 +338,7 @@ mod tests { let a = i8x8::new(1, 2, 3, 4, -5, -6, 7, 8); let b = i8x8::new(4, 64, 0, 3, 1, -1, -2, 1); let expected = i8x8::new(1, 2, 0, 4, -5, 6, -7, 8); - let r = ssse3::_mm_sign_pi8(a, b); + let r = i8x8::from(ssse3::_mm_sign_pi8(a.into(), b.into())); assert_eq!(r, expected); } @@ -347,7 +347,7 @@ mod tests { let a = i16x4::new(-1, 2, 3, 4); let b = i16x4::new(1, -1, 1, 0); let expected = i16x4::new(-1, -2, 3, 0); - let r = ssse3::_mm_sign_pi16(a, b); + let r = i16x4::from(ssse3::_mm_sign_pi16(a.into(), b.into())); assert_eq!(r, expected); } diff --git a/library/stdarch/stdsimd-test/src/lib.rs b/library/stdarch/stdsimd-test/src/lib.rs index 9e03eb5a671f..309ad9d587f6 100644 --- a/library/stdarch/stdsimd-test/src/lib.rs +++ b/library/stdarch/stdsimd-test/src/lib.rs @@ -293,9 +293,41 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { } } - let probably_only_one_instruction = function.instrs.len() < 30; + // Look for `call` instructions in the disassembly to detect whether + // inlining failed: all intrinsics are `#[inline(always)]`, so + // calling one intrinsic from another should not generate `call` + // instructions. + let mut inlining_failed = false; + for (i, instr) in function.instrs.iter().enumerate() { + let part = match instr.parts.get(0) { + Some(part) => part, + None => continue, + }; + if !part.contains("call") { + continue + } - if found && probably_only_one_instruction { + // On 32-bit x86 position independent code will call itself and be + // immediately followed by a `pop` to learn about the current address. + // Let's not take that into account when considering whether a function + // failed inlining something. + let followed_by_pop = function.instrs.get(i + 1) + .and_then(|i| i.parts.get(0)) + .map(|s| s.contains("pop")) + .unwrap_or(false); + if followed_by_pop && cfg!(target_arch = "x86") { + continue + } + + inlining_failed = true; + break; + } + + let instruction_limit = 30; + let probably_only_one_instruction = + function.instrs.len() < instruction_limit; + + if found && probably_only_one_instruction && !inlining_failed { return; } @@ -319,7 +351,12 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) { expected ); } else if !probably_only_one_instruction { - panic!("too many instructions in the disassembly"); + panic!("instruction found, but the disassembly contains too many \ + instructions: #instructions = {} >= {} (limit)", + function.instrs.len(), instruction_limit); + } else if inlining_failed { + panic!("instruction found, but the disassembly contains `call` \ + instructions, which hint that inlining failed"); } }