Lower the instruction limit to 20 (#262)

Right now it's 30 which is a bit high, most of the intrinsics requiring all
these instructions ended up needing to be fixed anyway.
This commit is contained in:
Alex Crichton 2018-01-03 17:21:01 -06:00 committed by GitHub
parent 07ebce51b8
commit edbfae36c0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 86 additions and 82 deletions

View file

@ -32,8 +32,8 @@ pub unsafe fn _mm_setzero_si64() -> __m64 {
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[cfg_attr(test, assert_instr(packsswb))]
pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 {
mem::transmute(packsswb(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
packsswb(a, b)
}
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
@ -44,8 +44,8 @@ pub unsafe fn _mm_packs_pi16(a: i16x4, b: i16x4) -> i8x8 {
#[inline(always)]
#[target_feature = "+mmx,+sse"]
#[cfg_attr(test, assert_instr(packssdw))]
pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 {
mem::transmute(packssdw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 {
packssdw(a, b)
}
/// Compares the 8-bit integer elements of two 64-bit integer vectors of
@ -68,8 +68,8 @@ pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(pcmpgtw))]
pub unsafe fn _mm_cmpgt_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(pcmpgtw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 {
pcmpgtw(a, b)
}
/// Unpacks the upper 32 bits from two 64-bit integer vectors of
@ -77,8 +77,8 @@ pub unsafe fn _mm_cmpgt_pi16(a: i16x4, b: i16x4) -> i16x4 {
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
pub unsafe fn _mm_unpackhi_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(punpckhwd(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 {
punpckhwd(a, b)
}
/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
@ -95,8 +95,8 @@ pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpcklwd))]
pub unsafe fn _mm_unpacklo_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(punpcklwd(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 {
punpcklwd(a, b)
}
#[allow(improper_ctypes)]
@ -134,7 +134,7 @@ mod tests {
let a = i16x4::new(-1, 2, -3, 4);
let b = i16x4::new(-5, 6, -7, 8);
let r = i8x8::new(-1, 2, -3, 4, -5, 6, -7, 8);
assert_eq!(r, mmx::_mm_packs_pi16(a, b));
assert_eq!(r, i8x8::from(mmx::_mm_packs_pi16(a.into(), b.into())));
}
#[simd_test = "sse"] // FIXME: should be mmx
@ -142,7 +142,7 @@ mod tests {
let a = i32x2::new(-1, 2);
let b = i32x2::new(-5, 6);
let r = i16x4::new(-1, 2, -5, 6);
assert_eq!(r, mmx::_mm_packs_pi32(a, b));
assert_eq!(r, i16x4::from(mmx::_mm_packs_pi32(a.into(), b.into())));
}
#[simd_test = "mmx"]
@ -158,7 +158,7 @@ mod tests {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 3, 2, 1);
let r = i16x4::new(0, 0, 0, -1);
assert_eq!(r, mmx::_mm_cmpgt_pi16(a, b));
assert_eq!(r, i16x4::from(mmx::_mm_cmpgt_pi16(a.into(), b.into())));
}
#[simd_test = "mmx"]
@ -166,7 +166,7 @@ mod tests {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 5, 6, 7);
let r = i16x4::new(2, 6, 3, 7);
assert_eq!(r, mmx::_mm_unpackhi_pi16(a, b));
assert_eq!(r, i16x4::from(mmx::_mm_unpackhi_pi16(a.into(), b.into())));
}
#[simd_test = "mmx"]
@ -182,6 +182,6 @@ mod tests {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 5, 6, 7);
let r = i16x4::new(0, 4, 1, 5);
assert_eq!(r, mmx::_mm_unpacklo_pi16(a, b));
assert_eq!(r, i16x4::from(mmx::_mm_unpacklo_pi16(a.into(), b.into())));
}
}

View file

@ -50,8 +50,8 @@ extern "C" {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _mm_max_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(pmaxsw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
pmaxsw(a, b)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
@ -59,7 +59,7 @@ pub unsafe fn _mm_max_pi16(a: i16x4, b: i16x4) -> i16x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _m_pmaxsw(a: i16x4, b: i16x4) -> i16x4 {
pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
_mm_max_pi16(a, b)
}
@ -86,8 +86,8 @@ pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _mm_min_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(pminsw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
pminsw(a, b)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
@ -95,7 +95,7 @@ pub unsafe fn _mm_min_pi16(a: i16x4, b: i16x4) -> i16x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _m_pminsw(a: i16x4, b: i16x4) -> i16x4 {
pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
_mm_min_pi16(a, b)
}
@ -123,8 +123,8 @@ pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _mm_mulhi_pu16(a: u16x4, b: u16x4) -> u16x4 {
mem::transmute(pmulhuw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
pmulhuw(a, b)
}
/// Multiplies packed 16-bit unsigned integer values and writes the
@ -133,7 +133,7 @@ pub unsafe fn _mm_mulhi_pu16(a: u16x4, b: u16x4) -> u16x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _m_pmulhuw(a: u16x4, b: u16x4) -> u16x4 {
pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
_mm_mulhi_pu16(a, b)
}
@ -163,8 +163,8 @@ pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _mm_avg_pu16(a: u16x4, b: u16x4) -> u16x4 {
mem::transmute(pavgw(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
pavgw(a, b)
}
/// Computes the rounded averages of the packed unsigned 16-bit integer
@ -173,7 +173,7 @@ pub unsafe fn _mm_avg_pu16(a: u16x4, b: u16x4) -> u16x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _m_pavgw(a: u16x4, b: u16x4) -> u16x4 {
pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu16(a, b)
}
@ -225,7 +225,7 @@ pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 {
/// float].
#[inline(always)]
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpi16_ps(a: i16x4) -> f32x4 {
pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_cmpgt_pi16(mem::transmute(b), a);
let c = mmx::_mm_unpackhi_pi16(a, b);
@ -240,14 +240,14 @@ pub unsafe fn _mm_cvtpi16_ps(a: i16x4) -> f32x4 {
/// 128-bit vector of [4 x float].
#[inline(always)]
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpu16_ps(a: u16x4) -> f32x4 {
let b = mem::transmute(mmx::_mm_setzero_si64());
let c = mmx::_mm_unpackhi_pi16(a.as_i16x4(), b);
pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let c = mmx::_mm_unpackhi_pi16(a, b);
let r = i586::_mm_setzero_ps();
let r = cvtpi2ps(r, mem::transmute(c));
let r = cvtpi2ps(r, c);
let r = i586::_mm_movelh_ps(r, r);
let c = mmx::_mm_unpacklo_pi16(a.as_i16x4(), b);
cvtpi2ps(r, mem::transmute(c))
let c = mmx::_mm_unpacklo_pi16(a, b);
cvtpi2ps(r, c)
}
/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]
@ -256,9 +256,9 @@ pub unsafe fn _mm_cvtpu16_ps(a: u16x4) -> f32x4 {
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_cmpgt_pi8(mem::transmute(b), a);
let b = mmx::_mm_cmpgt_pi8(b, a);
let b = mmx::_mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(mem::transmute(b))
_mm_cvtpi16_ps(b)
}
/// Converts the lower four unsigned 8-bit integer values from a 64-bit
@ -267,8 +267,8 @@ pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> f32x4 {
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_unpacklo_pi8(a, mem::transmute(b));
_mm_cvtpi16_ps(mem::transmute(b))
let b = mmx::_mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}
/// Converts the two 32-bit signed integer values from each 64-bit vector
@ -338,9 +338,9 @@ pub unsafe fn _m_pextrw(a: i16x4, imm2: i32) -> i16 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
pub unsafe fn _mm_insert_pi16(a: i16x4, d: i32, imm2: i32) -> i16x4 {
pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
macro_rules! call {
($imm2:expr) => { mem::transmute(pinsrw(mem::transmute(a), d, $imm2)) }
($imm2:expr) => { pinsrw(a, d, $imm2) }
}
constify_imm2!(imm2, call)
}
@ -351,7 +351,7 @@ pub unsafe fn _mm_insert_pi16(a: i16x4, d: i32, imm2: i32) -> i16x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
pub unsafe fn _m_pinsrw(a: i16x4, d: i32, imm2: i32) -> i16x4 {
pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
_mm_insert_pi16(a, d, imm2)
}
@ -380,9 +380,9 @@ pub unsafe fn _m_pmovmskb(a: i16x4) -> i32 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
pub unsafe fn _mm_shuffle_pi16(a: i16x4, imm8: i32) -> i16x4 {
pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => { mem::transmute(pshufw(mem::transmute(a), $imm8)) }
($imm8:expr) => { pshufw(a, $imm8) }
}
constify_imm8!(imm8, call)
}
@ -392,7 +392,7 @@ pub unsafe fn _mm_shuffle_pi16(a: i16x4, imm8: i32) -> i16x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
pub unsafe fn _m_pshufw(a: i16x4, imm8: i32) -> i16x4 {
pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
_mm_shuffle_pi16(a, imm8)
}
@ -419,8 +419,8 @@ pub unsafe fn _mm_cvtt_ps2pi(a: f32x4) -> i32x2 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2 {
mem::transmute(cvtps2pi(a))
pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> __m64 {
cvtps2pi(a)
}
/// Convert the two lower packed single-precision (32-bit) floating-point
@ -428,7 +428,7 @@ pub unsafe fn _mm_cvtps_pi32(a: f32x4) -> i32x2 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 {
pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> __m64 {
_mm_cvtps_pi32(a)
}
@ -437,7 +437,7 @@ pub unsafe fn _mm_cvt_ps2pi(a: f32x4) -> i32x2 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> i16x4 {
pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> __m64 {
let b = _mm_cvtps_pi32(a);
let a = i586::_mm_movehl_ps(a, a);
let c = _mm_cvtps_pi32(a);
@ -450,10 +450,10 @@ pub unsafe fn _mm_cvtps_pi16(a: f32x4) -> i16x4 {
#[inline(always)]
#[target_feature = "+sse"]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi8(a: f32x4) -> i8x8 {
pub unsafe fn _mm_cvtps_pi8(a: f32x4) -> __m64 {
let b = _mm_cvtps_pi16(a);
let c = mmx::_mm_setzero_si64();
mmx::_mm_packs_pi16(b, mem::transmute(c))
mmx::_mm_packs_pi16(b, c)
}
#[cfg(test)]
@ -472,8 +472,8 @@ mod tests {
let b = i16x4::new(5, -2, 7, -4);
let r = i16x4::new(5, 6, 7, 8);
assert_eq!(r, sse::_mm_max_pi16(a, b));
assert_eq!(r, sse::_m_pmaxsw(a, b));
assert_eq!(r, i16x4::from(sse::_mm_max_pi16(a.into(), b.into())));
assert_eq!(r, i16x4::from(sse::_m_pmaxsw(a.into(), b.into())));
}
#[simd_test = "sse"]
@ -492,8 +492,8 @@ mod tests {
let b = i16x4::new(5, -2, 7, -4);
let r = i16x4::new(-1, -2, -3, -4);
assert_eq!(r, sse::_mm_min_pi16(a, b));
assert_eq!(r, sse::_m_pminsw(a, b));
assert_eq!(r, i16x4::from(sse::_mm_min_pi16(a.into(), b.into())));
assert_eq!(r, i16x4::from(sse::_m_pminsw(a.into(), b.into())));
}
#[simd_test = "sse"]
@ -509,7 +509,7 @@ mod tests {
#[simd_test = "sse"]
unsafe fn _mm_mulhi_pu16() {
let (a, b) = (u16x4::splat(1000), u16x4::splat(1001));
let r = sse::_mm_mulhi_pu16(a, b);
let r = u16x4::from(sse::_mm_mulhi_pu16(a.into(), b.into()));
assert_eq!(r, u16x4::splat(15));
}
@ -526,10 +526,10 @@ mod tests {
#[simd_test = "sse"]
unsafe fn _mm_avg_pu16() {
let (a, b) = (u16x4::splat(3), u16x4::splat(9));
let r = sse::_mm_avg_pu16(a, b);
let r = u16x4::from(sse::_mm_avg_pu16(a.into(), b.into()));
assert_eq!(r, u16x4::splat(6));
let r = sse::_m_pavgw(a, b);
let r = u16x4::from(sse::_m_pavgw(a.into(), b.into()));
assert_eq!(r, u16x4::splat(6));
}
@ -561,7 +561,7 @@ mod tests {
unsafe fn _mm_cvtpi16_ps() {
let a = i16x4::new(1, 2, 3, 4);
let expected = f32x4::new(1., 2., 3., 4.);
let r = sse::_mm_cvtpi16_ps(a);
let r = sse::_mm_cvtpi16_ps(a.into());
assert_eq!(r, expected);
}
@ -569,7 +569,7 @@ mod tests {
unsafe fn _mm_cvtpu16_ps() {
let a = u16x4::new(1, 2, 3, 4);
let expected = f32x4::new(1., 2., 3., 4.);
let r = sse::_mm_cvtpu16_ps(a);
let r = sse::_mm_cvtpu16_ps(a.into());
assert_eq!(r, expected);
}
@ -626,14 +626,14 @@ mod tests {
#[simd_test = "sse"]
unsafe fn _mm_insert_pi16() {
let a = i16x4::new(1, 2, 3, 4);
let r = sse::_mm_insert_pi16(a, 0, 0b0);
let r = i16x4::from(sse::_mm_insert_pi16(a.into(), 0, 0b0));
let expected = i16x4::new(0, 2, 3, 4);
assert_eq!(r, expected);
let r = sse::_mm_insert_pi16(a, 0, 0b10);
let r = i16x4::from(sse::_mm_insert_pi16(a.into(), 0, 0b10));
let expected = i16x4::new(1, 2, 0, 4);
assert_eq!(r, expected);
let r = sse::_m_pinsrw(a, 0, 0b10);
let r = i16x4::from(sse::_m_pinsrw(a.into(), 0, 0b10));
assert_eq!(r, expected);
}
@ -650,11 +650,11 @@ mod tests {
#[simd_test = "sse"]
unsafe fn _mm_shuffle_pi16() {
let a = i16x4::new(1, 2, 3, 4);
let r = sse::_mm_shuffle_pi16(a, 0b00_01_01_11);
let r = i16x4::from(sse::_mm_shuffle_pi16(a.into(), 0b00_01_01_11));
let expected = i16x4::new(4, 2, 2, 1);
assert_eq!(r, expected);
let r = sse::_m_pshufw(a, 0b00_01_01_11);
let r = i16x4::from(sse::_m_pshufw(a.into(), 0b00_01_01_11));
assert_eq!(r, expected);
}
@ -663,8 +663,8 @@ mod tests {
let a = f32x4::new(1.0, 2.0, 3.0, 4.0);
let r = i32x2::new(1, 2);
assert_eq!(r, sse::_mm_cvtps_pi32(a));
assert_eq!(r, sse::_mm_cvt_ps2pi(a));
assert_eq!(r, i32x2::from(sse::_mm_cvtps_pi32(a)));
assert_eq!(r, i32x2::from(sse::_mm_cvt_ps2pi(a)));
}
#[simd_test = "sse"]
@ -680,13 +680,13 @@ mod tests {
unsafe fn _mm_cvtps_pi16() {
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
let r = i16x4::new(7, 2, 3, 4);
assert_eq!(r, sse::_mm_cvtps_pi16(a));
assert_eq!(r, i16x4::from(sse::_mm_cvtps_pi16(a)));
}
#[simd_test = "sse"]
unsafe fn _mm_cvtps_pi8() {
let a = f32x4::new(7.0, 2.0, 3.0, 4.0);
let r = i8x8::new(7, 2, 3, 4, 0, 0, 0, 0);
assert_eq!(r, sse::_mm_cvtps_pi8(a));
assert_eq!(r, i8x8::from(sse::_mm_cvtps_pi8(a)));
}
}

View file

@ -3,7 +3,6 @@
#[cfg(test)]
use stdsimd_test::assert_instr;
use core::mem;
use v64::*;
/// Compute the absolute value of packed 8-bit integers in `a` and
@ -29,8 +28,8 @@ pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(pabsd))]
pub unsafe fn _mm_abs_pi32(a: i32x2) -> u32x2 {
mem::transmute(pabsd(mem::transmute(a)))
pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
pabsd(a)
}
/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
@ -70,8 +69,8 @@ pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phaddd))]
pub unsafe fn _mm_hadd_pi32(a: i32x2, b: i32x2) -> i32x2 {
mem::transmute(phaddd(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
phaddd(a, b)
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
@ -98,8 +97,8 @@ pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(phsubd))]
pub unsafe fn _mm_hsub_pi32(a: i32x2, b: i32x2) -> i32x2 {
mem::transmute(phsubd(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
phsubd(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
@ -164,8 +163,8 @@ pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
#[inline(always)]
#[target_feature = "+ssse3"]
#[cfg_attr(test, assert_instr(psignd))]
pub unsafe fn _mm_sign_pi32(a: i32x2, b: i32x2) -> i32x2 {
mem::transmute(psignd(mem::transmute(a), mem::transmute(b)))
pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
psignd(a, b)
}
#[allow(improper_ctypes)]
@ -240,7 +239,7 @@ mod tests {
#[simd_test = "ssse3"]
unsafe fn _mm_abs_pi32() {
let r = ssse3::_mm_abs_pi32(i32x2::splat(-5));
let r = u32x2::from(ssse3::_mm_abs_pi32(i32x2::splat(-5).into()));
assert_eq!(r, u32x2::splat(5));
}
@ -275,7 +274,7 @@ mod tests {
let a = i32x2::new(1, 2);
let b = i32x2::new(4, 128);
let expected = i32x2::new(3, 132);
let r = ssse3::_mm_hadd_pi32(a, b);
let r = i32x2::from(ssse3::_mm_hadd_pi32(a.into(), b.into()));
assert_eq!(r, expected);
}
@ -302,7 +301,7 @@ mod tests {
let a = i32x2::new(1, 2);
let b = i32x2::new(4, 128);
let expected = i32x2::new(-1, -124);
let r = ssse3::_mm_hsub_pi32(a, b);
let r = i32x2::from(ssse3::_mm_hsub_pi32(a.into(), b.into()));
assert_eq!(r, expected);
}
@ -356,7 +355,7 @@ mod tests {
let a = i32x2::new(-1, 2);
let b = i32x2::new(1, 0);
let expected = i32x2::new(-1, 0);
let r = ssse3::_mm_sign_pi32(a, b);
let r = i32x2::from(ssse3::_mm_sign_pi32(a.into(), b.into()));
assert_eq!(r, expected);
}
}

View file

@ -323,7 +323,12 @@ pub fn assert(fnptr: usize, fnname: &str, expected: &str) {
break;
}
let instruction_limit = 30;
let instruction_limit = match expected {
// cpuid returns a pretty big aggregate structure so excempt it from the
// slightly more restrictive 20 instructions below
"cpuid" => 30,
_ => 20,
};
let probably_only_one_instruction =
function.instrs.len() < instruction_limit;