Assert intrinsic implementations are inlined properly (#261)
* assert_instr check for failed inlining * Fix `call` instructions showing up in some intrinsics The ABI of types like `u8x8` as they're defined isn't actually the underlying type we need for LLVM, but only `__m64` currently satisfies that. Apparently this (and the casts involved) caused some extraneous instructions for a number of intrinsics. They've all moved over to the `__m64` type now to ensure that they're what the underlying interface is. * Allow PIC-relative `call` instructions on x86 These should be harmless when evaluating whether we failed inlining
This commit is contained in:
parent
acc8d3de10
commit
07ebce51b8
5 changed files with 118 additions and 80 deletions
|
|
@ -56,8 +56,8 @@ pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(pcmpgtb))]
|
||||
pub unsafe fn _mm_cmpgt_pi8(a: i8x8, b: i8x8) -> i8x8 {
|
||||
mem::transmute(pcmpgtb(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
pcmpgtb(a, b)
|
||||
}
|
||||
|
||||
/// Compares the 16-bit integer elements of two 64-bit integer vectors of
|
||||
|
|
@ -86,8 +86,8 @@ pub unsafe fn _mm_unpackhi_pi16(a: i16x4, b: i16x4) -> i16x4 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+mmx"]
|
||||
#[cfg_attr(test, assert_instr(punpcklbw))]
|
||||
pub unsafe fn _mm_unpacklo_pi8(a: i8x8, b: i8x8) -> i8x8 {
|
||||
mem::transmute(punpcklbw(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
punpcklbw(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the lower 32 bits from two 64-bit integer vectors of
|
||||
|
|
@ -150,7 +150,7 @@ mod tests {
|
|||
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
|
||||
let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1);
|
||||
assert_eq!(r, mmx::_mm_cmpgt_pi8(a, b));
|
||||
assert_eq!(r, i8x8::from(mmx::_mm_cmpgt_pi8(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
|
|
@ -174,7 +174,7 @@ mod tests {
|
|||
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11);
|
||||
assert_eq!(r, mmx::_mm_unpacklo_pi8(a, b));
|
||||
assert_eq!(r, i8x8::from(mmx::_mm_unpacklo_pi8(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "mmx"]
|
||||
|
|
|
|||
|
|
@ -68,8 +68,8 @@ pub unsafe fn _m_pmaxsw(a: i16x4, b: i16x4) -> i16x4 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(pmaxub))]
|
||||
pub unsafe fn _mm_max_pu8(a: u8x8, b: u8x8) -> u8x8 {
|
||||
mem::transmute(pmaxub(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
pmaxub(a, b)
|
||||
}
|
||||
|
||||
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
|
||||
|
|
@ -77,7 +77,7 @@ pub unsafe fn _mm_max_pu8(a: u8x8, b: u8x8) -> u8x8 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(pmaxub))]
|
||||
pub unsafe fn _m_pmaxub(a: u8x8, b: u8x8) -> u8x8 {
|
||||
pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_max_pu8(a, b)
|
||||
}
|
||||
|
||||
|
|
@ -104,8 +104,8 @@ pub unsafe fn _m_pminsw(a: i16x4, b: i16x4) -> i16x4 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(pminub))]
|
||||
pub unsafe fn _mm_min_pu8(a: u8x8, b: u8x8) -> u8x8 {
|
||||
mem::transmute(pminub(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
pminub(a, b)
|
||||
}
|
||||
|
||||
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
|
||||
|
|
@ -113,7 +113,7 @@ pub unsafe fn _mm_min_pu8(a: u8x8, b: u8x8) -> u8x8 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(pminub))]
|
||||
pub unsafe fn _m_pminub(a: u8x8, b: u8x8) -> u8x8 {
|
||||
pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_min_pu8(a, b)
|
||||
}
|
||||
|
||||
|
|
@ -143,8 +143,8 @@ pub unsafe fn _m_pmulhuw(a: u16x4, b: u16x4) -> u16x4 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(pavgb))]
|
||||
pub unsafe fn _mm_avg_pu8(a: u8x8, b: u8x8) -> u8x8 {
|
||||
mem::transmute(pavgb(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
pavgb(a, b)
|
||||
}
|
||||
|
||||
/// Computes the rounded averages of the packed unsigned 8-bit integer
|
||||
|
|
@ -153,7 +153,7 @@ pub unsafe fn _mm_avg_pu8(a: u8x8, b: u8x8) -> u8x8 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(pavgb))]
|
||||
pub unsafe fn _m_pavgb(a: u8x8, b: u8x8) -> u8x8 {
|
||||
pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_avg_pu8(a, b)
|
||||
}
|
||||
|
||||
|
|
@ -184,8 +184,8 @@ pub unsafe fn _m_pavgw(a: u16x4, b: u16x4) -> u16x4 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(psadbw))]
|
||||
pub unsafe fn _mm_sad_pu8(a: u8x8, b: u8x8) -> __m64 {
|
||||
mem::transmute(psadbw(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
psadbw(a, b)
|
||||
}
|
||||
|
||||
/// Subtracts the corresponding 8-bit unsigned integer values of the two
|
||||
|
|
@ -195,8 +195,8 @@ pub unsafe fn _mm_sad_pu8(a: u8x8, b: u8x8) -> __m64 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(psadbw))]
|
||||
pub unsafe fn _m_psadbw(a: u8x8, b: u8x8) -> __m64 {
|
||||
mem::transmute(_mm_sad_pu8(a, b))
|
||||
pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_sad_pu8(a, b)
|
||||
}
|
||||
|
||||
/// Converts two elements of a 64-bit vector of [2 x i32] into two
|
||||
|
|
@ -254,7 +254,7 @@ pub unsafe fn _mm_cvtpu16_ps(a: u16x4) -> f32x4 {
|
|||
/// into a 128-bit vector of [4 x float].
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
pub unsafe fn _mm_cvtpi8_ps(a: i8x8) -> f32x4 {
|
||||
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
|
||||
let b = mmx::_mm_setzero_si64();
|
||||
let b = mmx::_mm_cmpgt_pi8(mem::transmute(b), a);
|
||||
let b = mmx::_mm_unpacklo_pi8(a, b);
|
||||
|
|
@ -265,9 +265,9 @@ pub unsafe fn _mm_cvtpi8_ps(a: i8x8) -> f32x4 {
|
|||
/// vector of [8 x u8] into a 128-bit vector of [4 x float].
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
pub unsafe fn _mm_cvtpu8_ps(a: u8x8) -> f32x4 {
|
||||
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
|
||||
let b = mmx::_mm_setzero_si64();
|
||||
let b = mmx::_mm_unpacklo_pi8(a.as_i8x8(), mem::transmute(b));
|
||||
let b = mmx::_mm_unpacklo_pi8(a, mem::transmute(b));
|
||||
_mm_cvtpi16_ps(mem::transmute(b))
|
||||
}
|
||||
|
||||
|
|
@ -293,8 +293,8 @@ pub unsafe fn _mm_cvtpi32x2_ps(a: i32x2, b: i32x2) -> f32x4 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(maskmovq))]
|
||||
pub unsafe fn _mm_maskmove_si64(a: i8x8, mask: i8x8, mem_addr: *mut i8) {
|
||||
maskmovq(mem::transmute(a), mem::transmute(mask), mem_addr)
|
||||
pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
|
||||
maskmovq(a, mask, mem_addr)
|
||||
}
|
||||
|
||||
/// Conditionally copies the values from each 8-bit element in the first
|
||||
|
|
@ -307,7 +307,7 @@ pub unsafe fn _mm_maskmove_si64(a: i8x8, mask: i8x8, mem_addr: *mut i8) {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+sse"]
|
||||
#[cfg_attr(test, assert_instr(maskmovq))]
|
||||
pub unsafe fn _m_maskmovq(a: i8x8, mask: i8x8, mem_addr: *mut i8) {
|
||||
pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
|
||||
_mm_maskmove_si64(a, mask, mem_addr)
|
||||
}
|
||||
|
||||
|
|
@ -482,8 +482,8 @@ mod tests {
|
|||
let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4);
|
||||
let r = u8x8::new(5, 6, 7, 8, 5, 6, 7, 8);
|
||||
|
||||
assert_eq!(r, sse::_mm_max_pu8(a, b));
|
||||
assert_eq!(r, sse::_m_pmaxub(a, b));
|
||||
assert_eq!(r, u8x8::from(sse::_mm_max_pu8(a.into(), b.into())));
|
||||
assert_eq!(r, u8x8::from(sse::_m_pmaxub(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
|
|
@ -502,8 +502,8 @@ mod tests {
|
|||
let b = u8x8::new(5, 2, 7, 4, 5, 2, 7, 4);
|
||||
let r = u8x8::new(2, 2, 3, 4, 2, 2, 3, 4);
|
||||
|
||||
assert_eq!(r, sse::_mm_min_pu8(a, b));
|
||||
assert_eq!(r, sse::_m_pminub(a, b));
|
||||
assert_eq!(r, u8x8::from(sse::_mm_min_pu8(a.into(), b.into())));
|
||||
assert_eq!(r, u8x8::from(sse::_m_pminub(a.into(), b.into())));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
|
|
@ -516,10 +516,10 @@ mod tests {
|
|||
#[simd_test = "sse"]
|
||||
unsafe fn _mm_avg_pu8() {
|
||||
let (a, b) = (u8x8::splat(3), u8x8::splat(9));
|
||||
let r = sse::_mm_avg_pu8(a, b);
|
||||
let r = u8x8::from(sse::_mm_avg_pu8(a.into(), b.into()));
|
||||
assert_eq!(r, u8x8::splat(6));
|
||||
|
||||
let r = sse::_m_pavgb(a, b);
|
||||
let r = u8x8::from(sse::_m_pavgb(a.into(), b.into()));
|
||||
assert_eq!(r, u8x8::splat(6));
|
||||
}
|
||||
|
||||
|
|
@ -538,10 +538,10 @@ mod tests {
|
|||
unsafe fn _mm_sad_pu8() {
|
||||
let a = u8x8::new(255, 254, 253, 252, 1, 2, 3, 4);
|
||||
let b = u8x8::new(0, 0, 0, 0, 2, 1, 2, 1);
|
||||
let r = sse::_mm_sad_pu8(a, b);
|
||||
let r = sse::_mm_sad_pu8(a.into(), b.into());
|
||||
assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0)));
|
||||
|
||||
let r = sse::_m_psadbw(a, b);
|
||||
let r = sse::_m_psadbw(a.into(), b.into());
|
||||
assert_eq!(r, mem::transmute(u16x4::new(1020, 0, 0, 0)));
|
||||
}
|
||||
|
||||
|
|
@ -577,7 +577,7 @@ mod tests {
|
|||
unsafe fn _mm_cvtpi8_ps() {
|
||||
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let expected = f32x4::new(1., 2., 3., 4.);
|
||||
let r = sse::_mm_cvtpi8_ps(a);
|
||||
let r = sse::_mm_cvtpi8_ps(a.into());
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
|
|
@ -585,7 +585,7 @@ mod tests {
|
|||
unsafe fn _mm_cvtpu8_ps() {
|
||||
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let expected = f32x4::new(1., 2., 3., 4.);
|
||||
let r = sse::_mm_cvtpu8_ps(a);
|
||||
let r = sse::_mm_cvtpu8_ps(a.into());
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
|
|
@ -603,11 +603,11 @@ mod tests {
|
|||
let a = i8x8::splat(9);
|
||||
let mask = i8x8::splat(0).replace(2, 0x80u8 as i8);
|
||||
let mut r = i8x8::splat(0);
|
||||
sse::_mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8);
|
||||
sse::_mm_maskmove_si64(a.into(), mask.into(), &mut r as *mut _ as *mut i8);
|
||||
assert_eq!(r, i8x8::splat(0).replace(2, 9));
|
||||
|
||||
let mut r = i8x8::splat(0);
|
||||
sse::_m_maskmovq(a, mask, &mut r as *mut _ as *mut i8);
|
||||
sse::_m_maskmovq(a.into(), mask.into(), &mut r as *mut _ as *mut i8);
|
||||
assert_eq!(r, i8x8::splat(0).replace(2, 9));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ extern "C" {
|
|||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
|
||||
ptestz(a.into(), mask.into())
|
||||
ptestz(i64x2::from(a), i64x2::from(mask))
|
||||
}
|
||||
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are all
|
||||
|
|
@ -52,7 +52,7 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
|
|||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
|
||||
ptestc(a.into(), mask.into())
|
||||
ptestc(i64x2::from(a), i64x2::from(mask))
|
||||
}
|
||||
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are
|
||||
|
|
@ -72,7 +72,7 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
|
|||
#[target_feature = "+sse4.1"]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
|
||||
ptestnzc(a.into(), mask.into())
|
||||
ptestnzc(i64x2::from(a), i64x2::from(mask))
|
||||
}
|
||||
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are all
|
||||
|
|
@ -111,7 +111,8 @@ pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
|
|||
#[cfg_attr(test, assert_instr(pcmpeqd))]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
|
||||
_mm_testc_si128(a, ::x86::_mm_cmpeq_epi32(a.into(), a.into()).into())
|
||||
let b = i32x4::from(a);
|
||||
_mm_testc_si128(a, __m128i::from(::x86::_mm_cmpeq_epi32(b, b)))
|
||||
}
|
||||
|
||||
/// Tests whether the specified bits in a 128-bit integer vector are
|
||||
|
|
|
|||
|
|
@ -11,8 +11,8 @@ use v64::*;
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(pabsb))]
|
||||
pub unsafe fn _mm_abs_pi8(a: i8x8) -> u8x8 {
|
||||
mem::transmute(pabsb(mem::transmute(a)))
|
||||
pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
|
||||
pabsb(a)
|
||||
}
|
||||
|
||||
/// Compute the absolute value of packed 8-bit integers in `a`, and return the
|
||||
|
|
@ -20,8 +20,8 @@ pub unsafe fn _mm_abs_pi8(a: i8x8) -> u8x8 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(pabsw))]
|
||||
pub unsafe fn _mm_abs_pi16(a: i16x4) -> u16x4 {
|
||||
mem::transmute(pabsw(mem::transmute(a)))
|
||||
pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
|
||||
pabsw(a)
|
||||
}
|
||||
|
||||
/// Compute the absolute value of packed 32-bit integers in `a`, and return the
|
||||
|
|
@ -38,8 +38,8 @@ pub unsafe fn _mm_abs_pi32(a: i32x2) -> u32x2 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(pshufb))]
|
||||
pub unsafe fn _mm_shuffle_pi8(a: u8x8, b: u8x8) -> u8x8 {
|
||||
mem::transmute(pshufb(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
pshufb(a, b)
|
||||
}
|
||||
|
||||
/// Concatenates the two 64-bit integer vector operands, and right-shifts
|
||||
|
|
@ -47,10 +47,10 @@ pub unsafe fn _mm_shuffle_pi8(a: u8x8, b: u8x8) -> u8x8 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(palignr, n = 15))]
|
||||
pub unsafe fn _mm_alignr_pi8(a: u8x8, b: u8x8, n: i32) -> u8x8 {
|
||||
pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
mem::transmute(palignrb(mem::transmute(a), mem::transmute(b), $imm8))
|
||||
palignrb(a, b, $imm8)
|
||||
}
|
||||
}
|
||||
constify_imm8!(n, call)
|
||||
|
|
@ -61,8 +61,8 @@ pub unsafe fn _mm_alignr_pi8(a: u8x8, b: u8x8, n: i32) -> u8x8 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(phaddw))]
|
||||
pub unsafe fn _mm_hadd_pi16(a: i16x4, b: i16x4) -> i16x4 {
|
||||
mem::transmute(phaddw(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phaddw(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally add the adjacent pairs of values contained in 2 packed
|
||||
|
|
@ -80,8 +80,8 @@ pub unsafe fn _mm_hadd_pi32(a: i32x2, b: i32x2) -> i32x2 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(phaddsw))]
|
||||
pub unsafe fn _mm_hadds_pi16(a: i16x4, b: i16x4) -> i16x4 {
|
||||
mem::transmute(phaddsw(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phaddsw(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
|
|
@ -89,8 +89,8 @@ pub unsafe fn _mm_hadds_pi16(a: i16x4, b: i16x4) -> i16x4 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(phsubw))]
|
||||
pub unsafe fn _mm_hsub_pi16(a: i16x4, b: i16x4) -> i16x4 {
|
||||
mem::transmute(phsubw(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phsubw(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
|
|
@ -109,8 +109,8 @@ pub unsafe fn _mm_hsub_pi32(a: i32x2, b: i32x2) -> i32x2 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(phsubsw))]
|
||||
pub unsafe fn _mm_hsubs_pi16(a: i16x4, b: i16x4) -> i16x4 {
|
||||
mem::transmute(phsubsw(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phsubsw(a, b)
|
||||
}
|
||||
|
||||
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
|
||||
|
|
@ -121,8 +121,8 @@ pub unsafe fn _mm_hsubs_pi16(a: i16x4, b: i16x4) -> i16x4 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(pmaddubsw))]
|
||||
pub unsafe fn _mm_maddubs_pi16(a: u8x8, b: i8x8) -> i16x4 {
|
||||
mem::transmute(pmaddubsw(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pmaddubsw(a, b)
|
||||
}
|
||||
|
||||
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
|
||||
|
|
@ -131,8 +131,8 @@ pub unsafe fn _mm_maddubs_pi16(a: u8x8, b: i8x8) -> i16x4 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(pmulhrsw))]
|
||||
pub unsafe fn _mm_mulhrs_pi16(a: i16x4, b: i16x4) -> i16x4 {
|
||||
mem::transmute(pmulhrsw(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pmulhrsw(a, b)
|
||||
}
|
||||
|
||||
/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
|
||||
|
|
@ -142,8 +142,8 @@ pub unsafe fn _mm_mulhrs_pi16(a: i16x4, b: i16x4) -> i16x4 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(psignb))]
|
||||
pub unsafe fn _mm_sign_pi8(a: i8x8, b: i8x8) -> i8x8 {
|
||||
mem::transmute(psignb(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
psignb(a, b)
|
||||
}
|
||||
|
||||
/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
|
||||
|
|
@ -153,8 +153,8 @@ pub unsafe fn _mm_sign_pi8(a: i8x8, b: i8x8) -> i8x8 {
|
|||
#[inline(always)]
|
||||
#[target_feature = "+ssse3"]
|
||||
#[cfg_attr(test, assert_instr(psignw))]
|
||||
pub unsafe fn _mm_sign_pi16(a: i16x4, b: i16x4) -> i16x4 {
|
||||
mem::transmute(psignw(mem::transmute(a), mem::transmute(b)))
|
||||
pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
psignw(a, b)
|
||||
}
|
||||
|
||||
/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
|
||||
|
|
@ -228,13 +228,13 @@ mod tests {
|
|||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_abs_pi8() {
|
||||
let r = ssse3::_mm_abs_pi8(i8x8::splat(-5));
|
||||
let r = u8x8::from(ssse3::_mm_abs_pi8(i8x8::splat(-5).into()));
|
||||
assert_eq!(r, u8x8::splat(5));
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
unsafe fn _mm_abs_pi16() {
|
||||
let r = ssse3::_mm_abs_pi16(i16x4::splat(-5));
|
||||
let r = u16x4::from(ssse3::_mm_abs_pi16(i16x4::splat(-5).into()));
|
||||
assert_eq!(r, u16x4::splat(5));
|
||||
}
|
||||
|
||||
|
|
@ -249,7 +249,7 @@ mod tests {
|
|||
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = u8x8::new(4, 128, 4, 3, 24, 12, 6, 19);
|
||||
let expected = u8x8::new(5, 0, 5, 4, 1, 5, 7, 4);
|
||||
let r = ssse3::_mm_shuffle_pi8(a, b);
|
||||
let r = u8x8::from(ssse3::_mm_shuffle_pi8(a.into(), b.into()));
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
|
|
@ -257,7 +257,7 @@ mod tests {
|
|||
unsafe fn _mm_alignr_pi8() {
|
||||
let a = u32x2::new(0x89ABCDEF_u32, 0x01234567_u32);
|
||||
let b = u32x2::new(0xBBAA9988_u32, 0xFFDDEECC_u32);
|
||||
let r = ssse3::_mm_alignr_pi8(u8x8::from(a), u8x8::from(b), 4);
|
||||
let r = ssse3::_mm_alignr_pi8(u8x8::from(a).into(), u8x8::from(b).into(), 4);
|
||||
assert_eq!(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
|
||||
}
|
||||
|
||||
|
|
@ -266,7 +266,7 @@ mod tests {
|
|||
let a = i16x4::new(1, 2, 3, 4);
|
||||
let b = i16x4::new(4, 128, 4, 3);
|
||||
let expected = i16x4::new(3, 7, 132, 7);
|
||||
let r = ssse3::_mm_hadd_pi16(a, b);
|
||||
let r = i16x4::from(ssse3::_mm_hadd_pi16(a.into(), b.into()));
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
|
|
@ -284,7 +284,7 @@ mod tests {
|
|||
let a = i16x4::new(1, 2, 3, 4);
|
||||
let b = i16x4::new(32767, 1, -32768, -1);
|
||||
let expected = i16x4::new(3, 7, 32767, -32768);
|
||||
let r = ssse3::_mm_hadds_pi16(a, b);
|
||||
let r = i16x4::from(ssse3::_mm_hadds_pi16(a.into(), b.into()));
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
|
|
@ -293,7 +293,7 @@ mod tests {
|
|||
let a = i16x4::new(1, 2, 3, 4);
|
||||
let b = i16x4::new(4, 128, 4, 3);
|
||||
let expected = i16x4::new(-1, -1, -124, 1);
|
||||
let r = ssse3::_mm_hsub_pi16(a, b);
|
||||
let r = i16x4::from(ssse3::_mm_hsub_pi16(a.into(), b.into()));
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
|
|
@ -311,7 +311,7 @@ mod tests {
|
|||
let a = i16x4::new(1, 2, 3, 4);
|
||||
let b = i16x4::new(4, 128, 4, 3);
|
||||
let expected = i16x4::new(-1, -1, -124, 1);
|
||||
let r = ssse3::_mm_hsubs_pi16(a, b);
|
||||
let r = i16x4::from(ssse3::_mm_hsubs_pi16(a.into(), b.into()));
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
|
|
@ -320,7 +320,7 @@ mod tests {
|
|||
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = i8x8::new(4, 63, 4, 3, 24, 12, 6, 19);
|
||||
let expected = i16x4::new(130, 24, 192, 194);
|
||||
let r = ssse3::_mm_maddubs_pi16(a, b);
|
||||
let r = i16x4::from(ssse3::_mm_maddubs_pi16(a.into(), b.into()));
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
|
|
@ -329,7 +329,7 @@ mod tests {
|
|||
let a = i16x4::new(1, 2, 3, 4);
|
||||
let b = i16x4::new(4, 32767, -1, -32768);
|
||||
let expected = i16x4::new(0, 2, 0, -4);
|
||||
let r = ssse3::_mm_mulhrs_pi16(a, b);
|
||||
let r = i16x4::from(ssse3::_mm_mulhrs_pi16(a.into(), b.into()));
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
|
|
@ -338,7 +338,7 @@ mod tests {
|
|||
let a = i8x8::new(1, 2, 3, 4, -5, -6, 7, 8);
|
||||
let b = i8x8::new(4, 64, 0, 3, 1, -1, -2, 1);
|
||||
let expected = i8x8::new(1, 2, 0, 4, -5, 6, -7, 8);
|
||||
let r = ssse3::_mm_sign_pi8(a, b);
|
||||
let r = i8x8::from(ssse3::_mm_sign_pi8(a.into(), b.into()));
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
|
|
@ -347,7 +347,7 @@ mod tests {
|
|||
let a = i16x4::new(-1, 2, 3, 4);
|
||||
let b = i16x4::new(1, -1, 1, 0);
|
||||
let expected = i16x4::new(-1, -2, 3, 0);
|
||||
let r = ssse3::_mm_sign_pi16(a, b);
|
||||
let r = i16x4::from(ssse3::_mm_sign_pi16(a.into(), b.into()));
|
||||
assert_eq!(r, expected);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -293,9 +293,41 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
|
|||
}
|
||||
}
|
||||
|
||||
let probably_only_one_instruction = function.instrs.len() < 30;
|
||||
// Look for `call` instructions in the disassembly to detect whether
|
||||
// inlining failed: all intrinsics are `#[inline(always)]`, so
|
||||
// calling one intrinsic from another should not generate `call`
|
||||
// instructions.
|
||||
let mut inlining_failed = false;
|
||||
for (i, instr) in function.instrs.iter().enumerate() {
|
||||
let part = match instr.parts.get(0) {
|
||||
Some(part) => part,
|
||||
None => continue,
|
||||
};
|
||||
if !part.contains("call") {
|
||||
continue
|
||||
}
|
||||
|
||||
if found && probably_only_one_instruction {
|
||||
// On 32-bit x86 position independent code will call itself and be
|
||||
// immediately followed by a `pop` to learn about the current address.
|
||||
// Let's not take that into account when considering whether a function
|
||||
// failed inlining something.
|
||||
let followed_by_pop = function.instrs.get(i + 1)
|
||||
.and_then(|i| i.parts.get(0))
|
||||
.map(|s| s.contains("pop"))
|
||||
.unwrap_or(false);
|
||||
if followed_by_pop && cfg!(target_arch = "x86") {
|
||||
continue
|
||||
}
|
||||
|
||||
inlining_failed = true;
|
||||
break;
|
||||
}
|
||||
|
||||
let instruction_limit = 30;
|
||||
let probably_only_one_instruction =
|
||||
function.instrs.len() < instruction_limit;
|
||||
|
||||
if found && probably_only_one_instruction && !inlining_failed {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
@ -319,7 +351,12 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
|
|||
expected
|
||||
);
|
||||
} else if !probably_only_one_instruction {
|
||||
panic!("too many instructions in the disassembly");
|
||||
panic!("instruction found, but the disassembly contains too many \
|
||||
instructions: #instructions = {} >= {} (limit)",
|
||||
function.instrs.len(), instruction_limit);
|
||||
} else if inlining_failed {
|
||||
panic!("instruction found, but the disassembly contains `call` \
|
||||
instructions, which hint that inlining failed");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue