Assert intrinsic implementations are inlined properly (#261)

* assert_instr check for failed inlining

* Fix `call` instructions showing up in some intrinsics

The ABI of types like `u8x8` as they're defined isn't actually the underlying
type we need for LLVM, but only `__m64` currently satisfies that. Apparently
this (and the casts involved) caused some extraneous instructions for a number
of intrinsics. They've all moved over to the `__m64` type now to ensure that
they're what the underlying interface is.

* Allow PIC-relative `call` instructions on x86

These should be harmless when evaluating whether we failed inlining
This commit is contained in:
Alex Crichton 2018-01-03 16:37:45 -06:00 committed by GitHub
parent acc8d3de10
commit 07ebce51b8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 118 additions and 80 deletions

View file

@ -56,8 +56,8 @@ pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 {
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(pcmpgtb))]
pub unsafe fn _mm_cmpgt_pi8(a: i8x8, b: i8x8) -> i8x8 {
mem::transmute(pcmpgtb(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
pcmpgtb(a, b)
}
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
@ -86,8 +86,8 @@ pub unsafe fn _mm_unpackhi_pi16(a: i16x4, b: i16x4) -> i16x4 {
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpcklbw))]
pub unsafe fn _mm_unpacklo_pi8(a: i8x8, b: i8x8) -> i8x8 {
mem::transmute(punpcklbw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
punpcklbw(a, b)
}
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
@ -150,7 +150,7 @@ mod tests {
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1);
assert_eq!(r, mmx::_mm_cmpgt_pi8(a, b));
assert_eq!(r, i8x8::from(mmx::_mm_cmpgt_pi8(a.into(), b.into())));
}
#[simd_test = "mmx"]
@ -174,7 +174,7 @@ mod tests {
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15);
let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11);
assert_eq!(r, mmx::_mm_unpacklo_pi8(a, b));
assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into())));
}
#[simd_test = "mmx"]

View file

@ -68,8 +68,8 @@ pub unsafe fn _m_pmaxsw(a: i16x4, b: i16x4) -> i16x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _mm_max_pu8(a: u8x8, b: u8x8) -> u8x8 {
mem::transmute(pmaxub(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
pmaxub(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
@ -77,7 +77,7 @@ pub unsafe fn _mm_max_pu8(a: u8x8, b: u8x8) -> u8x8 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _m_pmaxub(a: u8x8, b: u8x8) -> u8x8 {
pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
_mm_max_pu8(a, b)
}
@ -104,8 +104,8 @@ pub unsafe fn _m_pminsw(a: i16x4, b: i16x4) -> i16x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _mm_min_pu8(a: u8x8, b: u8x8) -> u8x8 {
mem::transmute(pminub(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
pminub(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
@ -113,7 +113,7 @@ pub unsafe fn _mm_min_pu8(a: u8x8, b: u8x8) -> u8x8 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 {
pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
_mm_min_pu8(a, b)
}
@ -143,8 +143,8 @@ pub unsafe fn _m_pmulhuw(a: u16x4, b: u16x4) -> u16x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _mm_avg_pu8(a: u8x8, b: u8x8) -> u8x8 {
mem::transmute(pavgb(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
pavgb(a, b)
}
/// Computes the rounded averages of the packed unsigned 8-bit integer
@ -153,7 +153,7 @@ pub unsafe fn _mm_avg_pu8(a: u8x8, b: u8x8) -> u8x8 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _m_pavgb(a: u8x8, b: u8x8) -> u8x8 {
pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu8(a, b)
}
@ -184,8 +184,8 @@ pub unsafe fn _m_pavgw(a: u16x4, b: u16x4) -> u16x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _mm_sad_pu8(a: u8x8, b: u8x8) -> __m64 {
mem::transmute(psadbw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
psadbw(a, b)
}
/// Subtracts the corresponding 8-bit unsigned integer values of the two
@ -195,8 +195,8 @@ pub unsafe fn _mm_sad_pu8(a: u8x8, b: u8x8) -> __m64 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _m_psadbw(a: u8x8, b: u8x8) -> __m64 {
mem::transmute(_mm_sad_pu8(a, b))
pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
_mm_sad_pu8(a, b)
}
/// Converts two elements of a 64-bit vector of [2 x i32] into two
@ -254,7 +254,7 @@ pub unsafe fn _mm_cvtpu16_ps(a: u16x4) -> f32x4 {
/// into a 128-bit vector of [4 x float].
#[inline(always)]
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpi8_ps(a: i8x8) -> f32x4 {
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_cmpgt_pi8(mem::transmute(b), a);
let b = mmx::_mm_unpacklo_pi8(a, b);
@ -265,9 +265,9 @@ pub unsafe fn _mm_cvtpi8_ps(a: i8x8) -> f32x4 {
/// vector of [8 x u8] into a 128-bit vector of [4 x float].
#[inline(always)]
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpu8_ps(a: u8x8) -> f32x4 {
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_unpacklo_pi8(a.as_i8x8(), mem::transmute(b));
let b = mmx::_mm_unpacklo_pi8(a, mem::transmute(b));
_mm_cvtpi16_ps(mem::transmute(b))
}
@ -293,8 +293,8 @@ pub unsafe fn _mm_cvtpi32x2_ps(a: i32x2, b: i32x2) -> f32x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _mm_maskmove_si64(a: i8x8, mask: i8x8, mem_addr: *mut i8) {
maskmovq(mem::transmute(a), mem::transmute(mask), mem_addr)
pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
maskmovq(a, mask, mem_addr)
}
/// Conditionally copies the values from each 8-bit element in the first
@ -307,7 +307,7 @@ pub unsafe fn _mm_maskmove_si64(a: i8x8, mask: i8x8, mem_addr: *mut i8) {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _m_maskmovq(a: i8x8, mask: i8x8, mem_addr: *mut i8) {
pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
_mm_maskmove_si64(a, mask, mem_addr)
}
@ -482,8 +482,8 @@ mod tests {
let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4);
let r = u8x8::new(5, 6, 7, 8, 5, 6, 7, 8);
assert_eq!(r, sse::_mm_max_pu8(a, b));
assert_eq!(r, sse::_m_pmaxub(a, b));
assert_eq!(r, u8x8::from(sse::_mm_max_pu8(a.into(), b.into())));
assert_eq!(r, u8x8::from(sse::_m_pmaxub(a.into(), b.into())));
}
#[simd_test = "sse"]
@ -502,8 +502,8 @@ mod tests {
let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4);
let r = u8x8::new(2, 2, 3, 4, 2, 2, 3, 4);
assert_eq!(r, sse::_mm_min_pu8(a, b));
assert_eq!(r, sse::_m_pminub(a, b));
assert_eq!(r, u8x8::from(sse::_mm_min_pu8(a.into(), b.into())));
assert_eq!(r, u8x8::from(sse::_m_pminub(a.into(), b.into())));
}
#[simd_test = "sse"]
@ -516,10 +516,10 @@ mod tests {
#[simd_test = "sse"]
unsafe fn _mm_avg_pu8() {
let (a, b) = (u8x8::splat(3), u8x8::splat(9));
let r = sse::_mm_avg_pu8(a, b);
let r = u8x8::from(sse::_mm_avg_pu8(a.into(), b.into()));
assert_eq!(r, u8x8::splat(6));
let r = sse::_m_pavgb(a, b);
let r = u8x8::from(sse::_m_pavgb(a.into(), b.into()));
assert_eq!(r, u8x8::splat(6));
}
@ -538,10 +538,10 @@ mod tests {
unsafe fn _mm_sad_pu8() {
let a = u8x8::new(255, 254, 253, 252, 1, 2, 3, 4);
let b = u8x8::new(0, 0, 0, 0, 2, 1, 2, 1);
let r = sse::_mm_sad_pu8(a, b);
let r = sse::_mm_sad_pu8(a.into(), b.into());
assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0)));
let r = sse::_m_psadbw(a, b);
let r = sse::_m_psadbw(a.into(), b.into());
assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0)));
}
@ -577,7 +577,7 @@ mod tests {
unsafe fn _mm_cvtpi8_ps() {
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let expected = f32x4::new(1., 2., 3., 4.);
let r = sse::_mm_cvtpi8_ps(a);
let r = sse::_mm_cvtpi8_ps(a.into());
assert_eq!(r, expected);
}
@ -585,7 +585,7 @@ mod tests {
unsafe fn _mm_cvtpu8_ps() {
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let expected = f32x4::new(1., 2., 3., 4.);
let r = sse::_mm_cvtpu8_ps(a);
let r = sse::_mm_cvtpu8_ps(a.into());
assert_eq!(r, expected);
}
@ -603,11 +603,11 @@ mod tests {
let a = i8x8::splat(9);
let mask = i8x8::splat(0).replace(2, 0x80u8 as i8);
let mut r = i8x8::splat(0);
sse::_mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8);
sse::_mm_maskmove_si64(a.into(), mask.into(), &mut r as *mut _ as *mut i8);
assert_eq!(r, i8x8::splat(0).replace(2, 9));
let mut r = i8x8::splat(0);
sse::_m_maskmovq(a, mask, &mut r as *mut _ as *mut i8);
sse::_m_maskmovq(a.into(), mask.into(), &mut r as *mut _ as *mut i8);
assert_eq!(r, i8x8::splat(0).replace(2, 9));
}

View file

@ -32,7 +32,7 @@ extern "C" {
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
ptestz(a.into(), mask.into())
ptestz(i64x2::from(a), i64x2::from(mask))
}
/// Tests whether the specified bits in a 128-bit integer vector are all
@ -52,7 +52,7 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestc(a.into(), mask.into())
ptestc(i64x2::from(a), i64x2::from(mask))
}
/// Tests whether the specified bits in a 128-bit integer vector are
@ -72,7 +72,7 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
#[target_feature = "+sse4.1"]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestnzc(a.into(), mask.into())
ptestnzc(i64x2::from(a), i64x2::from(mask))
}
/// Tests whether the specified bits in a 128-bit integer vector are all
@ -111,7 +111,8 @@ pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
#[cfg_attr(test, assert_instr(pcmpeqd))]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
_mm_testc_si128(a, ::x86::_mm_cmpeq_epi32(a.into(), a.into()).into())
let b = i32x4::from(a);
_mm_testc_si128(a, __m128i::from(::x86::_mm_cmpeq_epi32(b, b)))
}
/// Tests whether the specified bits in a 128-bit integer vector are

View file

@ -11,8 +11,8 @@ use v64::*;
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pabsb))]
pub unsafe fn _mm_abs_pi8(a: i8x8) -> u8x8 {
mem::transmute(pabsb(mem::transmute(a)))
pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
pabsb(a)
}
/// Compute the absolute value of packed 8-bit integers in `a`, and return the
@ -20,8 +20,8 @@ pub unsafe fn _mm_abs_pi8(a: i8x8) -> u8x8 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pabsw))]
pub unsafe fn _mm_abs_pi16(a: i16x4) -> u16x4 {
mem::transmute(pabsw(mem::transmute(a)))
pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
pabsw(a)
}
/// Compute the absolute value of packed 32-bit integers in `a`, and return the
@ -38,8 +38,8 @@ pub unsafe fn _mm_abs_pi32(a: i32x2) -> u32x2 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pshufb))]
pub unsafe fn _mm_shuffle_pi8(a: u8x8, b: u8x8) -> u8x8 {
mem::transmute(pshufb(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
pshufb(a, b)
}
/// Concatenates the two 64-bit integer vector operands, and right-shifts
@ -47,10 +47,10 @@ pub unsafe fn _mm_shuffle_pi8(a: u8x8, b: u8x8) -> u8x8 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(palignr, n = 15))]
pub unsafe fn _mm_alignr_pi8(a: u8x8, b: u8x8, n: i32) -> u8x8 {
pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => {
mem::transmute(palignrb(mem::transmute(a), mem::transmute(b), $imm8))
palignrb(a, b, $imm8)
}
}
constify_imm8!(n, call)
@ -61,8 +61,8 @@ pub unsafe fn _mm_alignr_pi8(a: u8x8, b: u8x8, n: i32) -> u8x8 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phaddw))]
pub unsafe fn _mm_hadd_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(phaddw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
phaddw(a, b)
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
@ -80,8 +80,8 @@ pub unsafe fn _mm_hadd_pi32(a: i32x2, b: i32x2) -> i32x2 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phaddsw))]
pub unsafe fn _mm_hadds_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(phaddsw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
phaddsw(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@ -89,8 +89,8 @@ pub unsafe fn _mm_hadds_pi16(a: i16x4, b: i16x4) -> i16x4 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phsubw))]
pub unsafe fn _mm_hsub_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(phsubw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
phsubw(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@ -109,8 +109,8 @@ pub unsafe fn _mm_hsub_pi32(a: i32x2, b: i32x2) -> i32x2 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phsubsw))]
pub unsafe fn _mm_hsubs_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(phsubsw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
phsubsw(a, b)
}
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
@ -121,8 +121,8 @@ pub unsafe fn _mm_hsubs_pi16(a: i16x4, b: i16x4) -> i16x4 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pmaddubsw))]
pub unsafe fn _mm_maddubs_pi16(a: u8x8, b: i8x8) -> i16x4 {
mem::transmute(pmaddubsw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
pmaddubsw(a, b)
}
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
@ -131,8 +131,8 @@ pub unsafe fn _mm_maddubs_pi16(a: u8x8, b: i8x8) -> i16x4 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pmulhrsw))]
pub unsafe fn _mm_mulhrs_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(pmulhrsw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
pmulhrsw(a, b)
}
/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
@ -142,8 +142,8 @@ pub unsafe fn _mm_mulhrs_pi16(a: i16x4, b: i16x4) -> i16x4 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(psignb))]
pub unsafe fn _mm_sign_pi8(a: i8x8, b: i8x8) -> i8x8 {
mem::transmute(psignb(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
psignb(a, b)
}
/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
@ -153,8 +153,8 @@ pub unsafe fn _mm_sign_pi8(a: i8x8, b: i8x8) -> i8x8 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(psignw))]
pub unsafe fn _mm_sign_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(psignw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
psignw(a, b)
}
/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
@ -228,13 +228,13 @@ mod tests {
#[simd_test = "ssse3"]
unsafe fn _mm_abs_pi8() {
let r = ssse3::_mm_abs_pi8(i8x8::splat(-5));
let r = u8x8::from(ssse3::_mm_abs_pi8(i8x8::splat(-5).into()));
assert_eq!(r, u8x8::splat(5));
}
#[simd_test = "ssse3"]
unsafe fn _mm_abs_pi16() {
let r = ssse3::_mm_abs_pi16(i16x4::splat(-5));
let r = u16x4::from(ssse3::_mm_abs_pi16(i16x4::splat(-5).into()));
assert_eq!(r, u16x4::splat(5));
}
@ -249,7 +249,7 @@ mod tests {
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = u8x8::new(4, 128, 4, 3, 24, 12, 6, 19);
let expected = u8x8::new(5, 0, 5, 4, 1, 5, 7, 4);
let r = ssse3::_mm_shuffle_pi8(a, b);
let r = u8x8::from(ssse3::_mm_shuffle_pi8(a.into(), b.into()));
assert_eq!(r, expected);
}
@ -257,7 +257,7 @@ mod tests {
unsafe fn _mm_alignr_pi8() {
let a = u32x2::new(0x89ABCDEF_u32, 0x01234567_u32);
let b = u32x2::new(0xBBAA9988_u32, 0xFFDDEECC_u32);
let r = ssse3::_mm_alignr_pi8(u8x8::from(a), u8x8::from(b), 4);
let r = ssse3::_mm_alignr_pi8(u8x8::from(a).into(), u8x8::from(b).into(), 4);
assert_eq!(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
}
@ -266,7 +266,7 @@ mod tests {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(4, 128, 4, 3);
let expected = i16x4::new(3, 7, 132, 7);
let r = ssse3::_mm_hadd_pi16(a, b);
let r = i16x4::from(ssse3::_mm_hadd_pi16(a.into(), b.into()));
assert_eq!(r, expected);
}
@ -284,7 +284,7 @@ mod tests {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(32767, 1, -32768, -1);
let expected = i16x4::new(3, 7, 32767, -32768);
let r = ssse3::_mm_hadds_pi16(a, b);
let r = i16x4::from(ssse3::_mm_hadds_pi16(a.into(), b.into()));
assert_eq!(r, expected);
}
@ -293,7 +293,7 @@ mod tests {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(4, 128, 4, 3);
let expected = i16x4::new(-1, -1, -124, 1);
let r = ssse3::_mm_hsub_pi16(a, b);
let r = i16x4::from(ssse3::_mm_hsub_pi16(a.into(), b.into()));
assert_eq!(r, expected);
}
@ -311,7 +311,7 @@ mod tests {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(4, 128, 4, 3);
let expected = i16x4::new(-1, -1, -124, 1);
let r = ssse3::_mm_hsubs_pi16(a, b);
let r = i16x4::from(ssse3::_mm_hsubs_pi16(a.into(), b.into()));
assert_eq!(r, expected);
}
@ -320,7 +320,7 @@ mod tests {
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b = i8x8::new(4, 63, 4, 3, 24, 12, 6, 19);
let expected = i16x4::new(130, 24, 192, 194);
let r = ssse3::_mm_maddubs_pi16(a, b);
let r = i16x4::from(ssse3::_mm_maddubs_pi16(a.into(), b.into()));
assert_eq!(r, expected);
}
@ -329,7 +329,7 @@ mod tests {
let a = i16x4::new(1, 2, 3, 4);
let b = i16x4::new(4, 32767, -1, -32768);
let expected = i16x4::new(0, 2, 0, -4);
let r = ssse3::_mm_mulhrs_pi16(a, b);
let r = i16x4::from(ssse3::_mm_mulhrs_pi16(a.into(), b.into()));
assert_eq!(r, expected);
}
@ -338,7 +338,7 @@ mod tests {
let a = i8x8::new(1, 2, 3, 4, -5, -6, 7, 8);
let b = i8x8::new(4, 64, 0, 3, 1, -1, -2, 1);
let expected = i8x8::new(1, 2, 0, 4, -5, 6, -7, 8);
let r = ssse3::_mm_sign_pi8(a, b);
let r = i8x8::from(ssse3::_mm_sign_pi8(a.into(), b.into()));
assert_eq!(r, expected);
}
@ -347,7 +347,7 @@ mod tests {
let a = i16x4::new(-1, 2, 3, 4);
let b = i16x4::new(1, -1, 1, 0);
let expected = i16x4::new(-1, -2, 3, 0);
let r = ssse3::_mm_sign_pi16(a, b);
let r = i16x4::from(ssse3::_mm_sign_pi16(a.into(), b.into()));
assert_eq!(r, expected);
}

View file

@ -293,9 +293,41 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
}
}
let probably_only_one_instruction = function.instrs.len() < 30;
// Look for `call` instructions in the disassembly to detect whether
// inlining failed: all intrinsics are `#[inline(always)]`, so
// calling one intrinsic from another should not generate `call`
// instructions.
let mut inlining_failed = false;
for (i, instr) in function.instrs.iter().enumerate() {
let part = match instr.parts.get(0) {
Some(part) => part,
None => continue,
};
if !part.contains("call") {
continue
}
if found && probably_only_one_instruction {
// On 32-bit x86 position independent code will call itself and be
// immediately followed by a `pop` to learn about the current address.
// Let's not take that into account when considering whether a function
// failed inlining something.
let followed_by_pop = function.instrs.get(i + 1)
.and_then(|i| i.parts.get(0))
.map(|s| s.contains("pop"))
.unwrap_or(false);
if followed_by_pop && cfg!(target_arch = "x86") {
continue
}
inlining_failed = true;
break;
}
let instruction_limit = 30;
let probably_only_one_instruction =
function.instrs.len() < instruction_limit;
if found && probably_only_one_instruction && !inlining_failed {
return;
}
@ -319,7 +351,12 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
expected
);
} else if !probably_only_one_instruction {
panic!("too many instructions in the disassembly");
panic!("instruction found, but the disassembly contains too many \
instructions: #instructions = {} >= {} (limit)",
function.instrs.len(), instruction_limit);
} else if inlining_failed {
panic!("instruction found, but the disassembly contains `call` \
instructions, which hint that inlining failed");
}
}