Don't distinguish between i586/i686 (#301)
This was historically done as the contents of the `i686` module wouldn't actually compile on i586 for various reasons. I believe I've tracked this down to #300 where LLVM refuses to compile a function using the `x86_mmx` type without actually enabling the `mmx` feature (sort of reasonably so!). This commit will now compile in both the `i586` and `i686` modules of this crate into the `i586-unknown-linux-gnu` target, and the relevant functions now also enable the `mmx` feature if they're using the `__m64` type. I believe this is uncovering a more widespread problem where the `__m64` isn't usable outside the context of `mmx`-enabled functions. The i686 and x86_64 targets have this feature enabled by default which is why it's worked there, but they're not enabled for the i586 target. We'll probably want to consider this when stabilizing!
This commit is contained in:
parent
e0aed0fffc
commit
aefb22c51e
5 changed files with 157 additions and 161 deletions
|
|
@ -15,7 +15,7 @@
|
|||
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
|
||||
const_atomic_usize_new, stmt_expr_attributes, core_intrinsics,
|
||||
crate_in_paths)]
|
||||
#![cfg_attr(test, feature(proc_macro, test, repr_align, attr_literals))]
|
||||
#![cfg_attr(test, feature(proc_macro, test, attr_literals))]
|
||||
#![cfg_attr(feature = "cargo-clippy",
|
||||
allow(inline_always, too_many_arguments, cast_sign_loss,
|
||||
cast_lossless, cast_possible_wrap,
|
||||
|
|
|
|||
|
|
@ -44,8 +44,8 @@ extern "C" {
|
|||
|
||||
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
|
||||
/// greatest value into the result.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmaxsw))]
|
||||
pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pmaxsw(a, b)
|
||||
|
|
@ -53,8 +53,8 @@ pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
|
||||
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
|
||||
/// greatest value into the result.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmaxsw))]
|
||||
pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_max_pi16(a, b)
|
||||
|
|
@ -62,8 +62,8 @@ pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
|
|||
|
||||
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
|
||||
/// greatest value into the result.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmaxub))]
|
||||
pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
pmaxub(a, b)
|
||||
|
|
@ -71,8 +71,8 @@ pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
|
|||
|
||||
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
|
||||
/// greatest value into the result.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmaxub))]
|
||||
pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_max_pu8(a, b)
|
||||
|
|
@ -80,8 +80,8 @@ pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
|
|||
|
||||
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
|
||||
/// smallest value into the result.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pminsw))]
|
||||
pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pminsw(a, b)
|
||||
|
|
@ -89,8 +89,8 @@ pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
|
||||
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
|
||||
/// smallest value into the result.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pminsw))]
|
||||
pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_min_pi16(a, b)
|
||||
|
|
@ -98,8 +98,8 @@ pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
|
|||
|
||||
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
|
||||
/// smallest value into the result.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pminub))]
|
||||
pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
pminub(a, b)
|
||||
|
|
@ -107,8 +107,8 @@ pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
|
|||
|
||||
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
|
||||
/// smallest value into the result.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pminub))]
|
||||
pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_min_pu8(a, b)
|
||||
|
|
@ -117,8 +117,8 @@ pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
|
|||
/// Multiplies packed 16-bit unsigned integer values and writes the
|
||||
/// high-order 16 bits of each 32-bit product to the corresponding bits in
|
||||
/// the destination.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmulhuw))]
|
||||
pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
|
||||
pmulhuw(a, b)
|
||||
|
|
@ -127,8 +127,8 @@ pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
|
|||
/// Multiplies packed 16-bit unsigned integer values and writes the
|
||||
/// high-order 16 bits of each 32-bit product to the corresponding bits in
|
||||
/// the destination.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmulhuw))]
|
||||
pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_mulhi_pu16(a, b)
|
||||
|
|
@ -137,8 +137,8 @@ pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
|
|||
/// Computes the rounded averages of the packed unsigned 8-bit integer
|
||||
/// values and writes the averages to the corresponding bits in the
|
||||
/// destination.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pavgb))]
|
||||
pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
pavgb(a, b)
|
||||
|
|
@ -147,8 +147,8 @@ pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
|
|||
/// Computes the rounded averages of the packed unsigned 8-bit integer
|
||||
/// values and writes the averages to the corresponding bits in the
|
||||
/// destination.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pavgb))]
|
||||
pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_avg_pu8(a, b)
|
||||
|
|
@ -157,8 +157,8 @@ pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
|
|||
/// Computes the rounded averages of the packed unsigned 16-bit integer
|
||||
/// values and writes the averages to the corresponding bits in the
|
||||
/// destination.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pavgw))]
|
||||
pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
|
||||
pavgw(a, b)
|
||||
|
|
@ -167,8 +167,8 @@ pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
|
|||
/// Computes the rounded averages of the packed unsigned 16-bit integer
|
||||
/// values and writes the averages to the corresponding bits in the
|
||||
/// destination.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pavgw))]
|
||||
pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_avg_pu16(a, b)
|
||||
|
|
@ -178,8 +178,8 @@ pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
|
|||
/// 64-bit vector operands and computes the absolute value for each of the
|
||||
/// difference. Then sum of the 8 absolute differences is written to the
|
||||
/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psadbw))]
|
||||
pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
psadbw(a, b)
|
||||
|
|
@ -189,8 +189,8 @@ pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
|
|||
/// 64-bit vector operands and computes the absolute value for each of the
|
||||
/// difference. Then sum of the 8 absolute differences is written to the
|
||||
/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psadbw))]
|
||||
pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_sad_pu8(a, b)
|
||||
|
|
@ -200,8 +200,8 @@ pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
|
|||
/// floating point values and writes them to the lower 64-bits of the
|
||||
/// destination. The remaining higher order elements of the destination are
|
||||
/// copied from the corresponding elements in the first operand.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
|
||||
cvtpi2ps(a, mem::transmute(b))
|
||||
|
|
@ -211,16 +211,16 @@ pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
|
|||
/// floating point values and writes them to the lower 64-bits of the
|
||||
/// destination. The remaining higher order elements of the destination are
|
||||
/// copied from the corresponding elements in the first operand.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 {
|
||||
_mm_cvtpi32_ps(a, b)
|
||||
}
|
||||
|
||||
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 {
|
||||
let b = _mm_setzero_si64();
|
||||
|
|
@ -230,8 +230,8 @@ pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 {
|
|||
}
|
||||
|
||||
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 {
|
||||
let b = _mm_setzero_si64();
|
||||
|
|
@ -240,8 +240,8 @@ pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 {
|
|||
}
|
||||
|
||||
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 {
|
||||
let b = _mm_setzero_si64();
|
||||
|
|
@ -255,8 +255,8 @@ pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 {
|
|||
}
|
||||
|
||||
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
|
||||
let b = _mm_setzero_si64();
|
||||
|
|
@ -270,8 +270,8 @@ pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
|
|||
|
||||
/// Converts the two 32-bit signed integer values from each 64-bit vector
|
||||
/// operand of [2 x i32] into a 128-bit vector of [4 x float].
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
|
||||
let c = i586::_mm_setzero_ps();
|
||||
|
|
@ -287,8 +287,8 @@ pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
|
|||
///
|
||||
/// To minimize caching, the data is flagged as non-temporal
|
||||
/// (unlikely to be used again soon).
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(maskmovq))]
|
||||
pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
|
||||
maskmovq(a, mask, mem_addr)
|
||||
|
|
@ -301,8 +301,8 @@ pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
|
|||
///
|
||||
/// To minimize caching, the data is flagged as non-temporal
|
||||
/// (unlikely to be used again soon).
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(maskmovq))]
|
||||
pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
|
||||
_mm_maskmove_si64(a, mask, mem_addr)
|
||||
|
|
@ -310,8 +310,8 @@ pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
|
|||
|
||||
/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
|
||||
/// returns it, as specified by the immediate integer operand.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
|
||||
pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i16 {
|
||||
macro_rules! call {
|
||||
|
|
@ -322,8 +322,8 @@ pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i16 {
|
|||
|
||||
/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
|
||||
/// returns it, as specified by the immediate integer operand.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
|
||||
pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i16 {
|
||||
_mm_extract_pi16(a, imm2)
|
||||
|
|
@ -332,8 +332,8 @@ pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i16 {
|
|||
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
|
||||
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
|
||||
/// specified by the immediate operand `n`.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
|
||||
pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
|
|
@ -345,8 +345,8 @@ pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
|
|||
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
|
||||
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
|
||||
/// specified by the immediate operand `n`.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
|
||||
pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
|
||||
_mm_insert_pi16(a, d, imm2)
|
||||
|
|
@ -355,8 +355,8 @@ pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
|
|||
/// Takes the most significant bit from each 8-bit element in a 64-bit
|
||||
/// integer vector to create a 16-bit mask value. Zero-extends the value to
|
||||
/// 32-bit integer and writes it to the destination.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmovmskb))]
|
||||
pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
|
||||
pmovmskb(mem::transmute(a))
|
||||
|
|
@ -365,8 +365,8 @@ pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
|
|||
/// Takes the most significant bit from each 8-bit element in a 64-bit
|
||||
/// integer vector to create a 16-bit mask value. Zero-extends the value to
|
||||
/// 32-bit integer and writes it to the destination.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmovmskb))]
|
||||
pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
|
||||
_mm_movemask_pi8(a)
|
||||
|
|
@ -374,8 +374,8 @@ pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
|
|||
|
||||
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
|
||||
/// destination, as specified by the immediate value operand.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
|
||||
pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
|
|
@ -386,8 +386,8 @@ pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
|
|||
|
||||
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
|
||||
/// destination, as specified by the immediate value operand.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
|
||||
pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
|
||||
_mm_shuffle_pi16(a, imm8)
|
||||
|
|
@ -395,8 +395,8 @@ pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
|
|||
|
||||
/// Convert the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers with truncation.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvttps2pi))]
|
||||
pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
|
||||
mem::transmute(cvttps2pi(a))
|
||||
|
|
@ -404,8 +404,8 @@ pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
|
|||
|
||||
/// Convert the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers with truncation.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvttps2pi))]
|
||||
pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
|
||||
_mm_cvttps_pi32(a)
|
||||
|
|
@ -413,8 +413,8 @@ pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
|
|||
|
||||
/// Convert the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 {
|
||||
cvtps2pi(a)
|
||||
|
|
@ -422,8 +422,8 @@ pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 {
|
|||
|
||||
/// Convert the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 {
|
||||
_mm_cvtps_pi32(a)
|
||||
|
|
@ -431,8 +431,8 @@ pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 {
|
|||
|
||||
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
|
||||
/// packed 16-bit integers.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 {
|
||||
let b = _mm_cvtps_pi32(a);
|
||||
|
|
@ -444,8 +444,8 @@ pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 {
|
|||
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
|
||||
/// packed 8-bit integers, and returns theem in the lower 4 elements of the
|
||||
/// result.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
|
||||
let b = _mm_cvtps_pi16(a);
|
||||
|
|
@ -458,7 +458,7 @@ mod tests {
|
|||
use x86::*;
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_max_pi16() {
|
||||
let a = _mm_setr_pi16(-1, 6, -3, 8);
|
||||
let b = _mm_setr_pi16(5, -2, 7, -4);
|
||||
|
|
@ -468,7 +468,7 @@ mod tests {
|
|||
assert_eq_m64(r, _m_pmaxsw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_max_pu8() {
|
||||
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
|
||||
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
|
||||
|
|
@ -478,7 +478,7 @@ mod tests {
|
|||
assert_eq_m64(r, _m_pmaxub(a, b));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_min_pi16() {
|
||||
let a = _mm_setr_pi16(-1, 6, -3, 8);
|
||||
let b = _mm_setr_pi16(5, -2, 7, -4);
|
||||
|
|
@ -488,7 +488,7 @@ mod tests {
|
|||
assert_eq_m64(r, _m_pminsw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_min_pu8() {
|
||||
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
|
||||
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
|
||||
|
|
@ -498,21 +498,21 @@ mod tests {
|
|||
assert_eq_m64(r, _m_pminub(a, b));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_mulhi_pu16() {
|
||||
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
|
||||
let r = _mm_mulhi_pu16(a, b);
|
||||
assert_eq_m64(r, _mm_set1_pi16(15));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_m_pmulhuw() {
|
||||
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
|
||||
let r = _m_pmulhuw(a, b);
|
||||
assert_eq_m64(r, _mm_set1_pi16(15));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_avg_pu8() {
|
||||
let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9));
|
||||
let r = _mm_avg_pu8(a, b);
|
||||
|
|
@ -522,7 +522,7 @@ mod tests {
|
|||
assert_eq_m64(r, _mm_set1_pi8(6));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_avg_pu16() {
|
||||
let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9));
|
||||
let r = _mm_avg_pu16(a, b);
|
||||
|
|
@ -532,7 +532,7 @@ mod tests {
|
|||
assert_eq_m64(r, _mm_set1_pi16(6));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_sad_pu8() {
|
||||
let a = _mm_setr_pi8(255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
|
||||
1, 2, 3, 4);
|
||||
|
|
@ -544,7 +544,7 @@ mod tests {
|
|||
assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_cvtpi32_ps() {
|
||||
let a = _mm_setr_ps(0., 0., 3., 4.);
|
||||
let b = _mm_setr_pi32(1, 2);
|
||||
|
|
@ -556,7 +556,7 @@ mod tests {
|
|||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_cvtpi16_ps() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
|
|
@ -564,7 +564,7 @@ mod tests {
|
|||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_cvtpu16_ps() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
|
|
@ -572,7 +572,7 @@ mod tests {
|
|||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_cvtpi8_ps() {
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
|
|
@ -580,7 +580,7 @@ mod tests {
|
|||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_cvtpu8_ps() {
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
|
|
@ -588,7 +588,7 @@ mod tests {
|
|||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_cvtpi32x2_ps() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let b = _mm_setr_pi32(3, 4);
|
||||
|
|
@ -597,7 +597,7 @@ mod tests {
|
|||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_maskmove_si64() {
|
||||
let a = _mm_set1_pi8(9);
|
||||
let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0);
|
||||
|
|
@ -615,7 +615,7 @@ mod tests {
|
|||
assert_eq_m64(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_extract_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let r = _mm_extract_pi16(a, 0);
|
||||
|
|
@ -627,7 +627,7 @@ mod tests {
|
|||
assert_eq!(r, 2);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_insert_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let r = _mm_insert_pi16(a, 0, 0b0);
|
||||
|
|
@ -641,7 +641,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_movemask_pi8() {
|
||||
let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
|
||||
let r = _mm_movemask_pi8(a);
|
||||
|
|
@ -651,7 +651,7 @@ mod tests {
|
|||
assert_eq!(r, 0b10001);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_shuffle_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let r = _mm_shuffle_pi16(a, 0b00_01_01_11);
|
||||
|
|
@ -662,7 +662,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_cvtps_pi32() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let r = _mm_setr_pi32(1, 2);
|
||||
|
|
@ -671,7 +671,7 @@ mod tests {
|
|||
assert_eq_m64(r, _mm_cvt_ps2pi(a));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_cvttps_pi32() {
|
||||
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
|
||||
let r = _mm_setr_pi32(7, 2);
|
||||
|
|
@ -680,14 +680,14 @@ mod tests {
|
|||
assert_eq_m64(r, _mm_cvtt_ps2pi(a));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_cvtps_pi16() {
|
||||
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
|
||||
let r = _mm_setr_pi16(7, 2, 3, 4);
|
||||
assert_eq_m64(r, _mm_cvtps_pi16(a));
|
||||
}
|
||||
|
||||
#[simd_test = "sse"]
|
||||
#[simd_test = "sse,mmx"]
|
||||
unsafe fn test_mm_cvtps_pi8() {
|
||||
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
|
||||
let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0);
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ use stdsimd_test::assert_instr;
|
|||
/// Adds two signed or unsigned 64-bit integer values, returning the
|
||||
/// lower 64 bits of the sum.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddq))]
|
||||
pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
|
||||
paddq(a, b)
|
||||
|
|
@ -21,7 +21,7 @@ pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
|
|||
/// of the two 64-bit integer vectors and returns the 64-bit unsigned
|
||||
/// product.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmuludq))]
|
||||
pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
|
||||
pmuludq(mem::transmute(a), mem::transmute(b))
|
||||
|
|
@ -30,7 +30,7 @@ pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
|
|||
/// Subtracts signed or unsigned 64-bit integer values and writes the
|
||||
/// difference to the corresponding bits in the destination.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubq))]
|
||||
pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
|
||||
psubq(a, b)
|
||||
|
|
@ -40,7 +40,7 @@ pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
|
|||
/// [2 x i32] into two double-precision floating-point values, returned in a
|
||||
/// 128-bit vector of [2 x double].
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2pd))]
|
||||
pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d {
|
||||
cvtpi2pd(a)
|
||||
|
|
@ -49,7 +49,7 @@ pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d {
|
|||
/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
|
||||
/// the specified 64-bit integer values.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
// no particular instruction to test
|
||||
pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i {
|
||||
_mm_set_epi64x(mem::transmute(e1), mem::transmute(e0))
|
||||
|
|
@ -58,7 +58,7 @@ pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i {
|
|||
/// Initializes both values in a 128-bit vector of [2 x i64] with the
|
||||
/// specified 64-bit value.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
// no particular instruction to test
|
||||
pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i {
|
||||
_mm_set_epi64x(mem::transmute(a), mem::transmute(a))
|
||||
|
|
@ -67,7 +67,7 @@ pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i {
|
|||
/// Constructs a 128-bit integer vector, initialized in reverse order
|
||||
/// with the specified 64-bit integral values.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
// no particular instruction to test
|
||||
pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i {
|
||||
_mm_set_epi64x(mem::transmute(e0), mem::transmute(e1))
|
||||
|
|
@ -76,7 +76,7 @@ pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i {
|
|||
/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
|
||||
/// integer.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
|
||||
// instr?
|
||||
pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 {
|
||||
|
|
@ -86,7 +86,7 @@ pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 {
|
|||
/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
|
||||
/// upper bits.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
|
||||
// instr?
|
||||
pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i {
|
||||
|
|
@ -97,7 +97,7 @@ pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i {
|
|||
/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
|
||||
/// returned in a 64-bit vector of [2 x i32].
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpd2pi))]
|
||||
pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 {
|
||||
cvtpd2pi(a)
|
||||
|
|
@ -109,7 +109,7 @@ pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 {
|
|||
/// If the result of either conversion is inexact, the result is truncated
|
||||
/// (rounded towards zero) regardless of the current MXCSR setting.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvttpd2pi))]
|
||||
pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 {
|
||||
cvttpd2pi(a)
|
||||
|
|
@ -139,7 +139,7 @@ mod tests {
|
|||
|
||||
use x86::*;
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_add_si64() {
|
||||
let a = 1i64;
|
||||
let b = 2i64;
|
||||
|
|
@ -148,7 +148,7 @@ mod tests {
|
|||
assert_eq!(mem::transmute::<__m64, i64>(r), expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_mul_su32() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let b = _mm_setr_pi32(3, 4);
|
||||
|
|
@ -157,7 +157,7 @@ mod tests {
|
|||
assert_eq_m64(r, mem::transmute(expected));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_sub_si64() {
|
||||
let a = 1i64;
|
||||
let b = 2i64;
|
||||
|
|
@ -166,7 +166,7 @@ mod tests {
|
|||
assert_eq!(mem::transmute::<__m64, i64>(r), expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_cvtpi32_pd() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let expected = _mm_setr_pd(1., 2.);
|
||||
|
|
@ -174,33 +174,33 @@ mod tests {
|
|||
assert_eq_m128d(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_set_epi64() {
|
||||
let r =
|
||||
_mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(2, 1));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_set1_epi64() {
|
||||
let r = _mm_set1_epi64(mem::transmute(1i64));
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(1, 1));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_setr_epi64() {
|
||||
let r =
|
||||
_mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_movepi64_pi64() {
|
||||
let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0));
|
||||
assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_movpi64_epi64() {
|
||||
let r = _mm_movpi64_epi64(_mm_setr_pi8(
|
||||
5,
|
||||
|
|
@ -215,14 +215,14 @@ mod tests {
|
|||
assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_cvtpd_pi32() {
|
||||
let a = _mm_setr_pd(5., 0.);
|
||||
let r = _mm_cvtpd_pi32(a);
|
||||
assert_eq_m64(r, _mm_setr_pi32(5, 0));
|
||||
}
|
||||
|
||||
#[simd_test = "sse2"]
|
||||
#[simd_test = "sse2,mmx"]
|
||||
unsafe fn test_mm_cvttpd_pi32() {
|
||||
use std::{f64, i32};
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ use x86::*;
|
|||
/// Compute the absolute value of packed 8-bit integers in `a` and
|
||||
/// return the unsigned results.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pabsb))]
|
||||
pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
|
||||
pabsb(a)
|
||||
|
|
@ -17,7 +17,7 @@ pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
|
|||
/// Compute the absolute value of packed 8-bit integers in `a`, and return the
|
||||
/// unsigned results.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pabsw))]
|
||||
pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
|
||||
pabsw(a)
|
||||
|
|
@ -26,7 +26,7 @@ pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
|
|||
/// Compute the absolute value of packed 32-bit integers in `a`, and return the
|
||||
/// unsigned results.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pabsd))]
|
||||
pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
|
||||
pabsd(a)
|
||||
|
|
@ -35,7 +35,7 @@ pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
|
|||
/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
|
||||
/// the corresponding 8-bit element of `b`, and return the results
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pshufb))]
|
||||
pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
pshufb(a, b)
|
||||
|
|
@ -44,7 +44,7 @@ pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
|
|||
/// Concatenates the two 64-bit integer vector operands, and right-shifts
|
||||
/// the result by the number of bytes specified in the immediate operand.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(palignr, n = 15))]
|
||||
pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
|
|
@ -58,7 +58,7 @@ pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
|
|||
/// Horizontally add the adjacent pairs of values contained in 2 packed
|
||||
/// 64-bit vectors of [4 x i16].
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phaddw))]
|
||||
pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phaddw(a, b)
|
||||
|
|
@ -67,7 +67,7 @@ pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
/// Horizontally add the adjacent pairs of values contained in 2 packed
|
||||
/// 64-bit vectors of [2 x i32].
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phaddd))]
|
||||
pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
phaddd(a, b)
|
||||
|
|
@ -77,7 +77,7 @@ pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
|
|||
/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
|
||||
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phaddsw))]
|
||||
pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phaddsw(a, b)
|
||||
|
|
@ -86,7 +86,7 @@ pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 64-bit vectors of [4 x i16].
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phsubw))]
|
||||
pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phsubw(a, b)
|
||||
|
|
@ -95,7 +95,7 @@ pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 64-bit vectors of [2 x i32].
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phsubd))]
|
||||
pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
phsubd(a, b)
|
||||
|
|
@ -106,7 +106,7 @@ pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
|
|||
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
|
||||
/// saturated to 8000h.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phsubsw))]
|
||||
pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phsubsw(a, b)
|
||||
|
|
@ -118,7 +118,7 @@ pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
/// contiguous products with signed saturation, and writes the 16-bit sums to
|
||||
/// the corresponding bits in the destination.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmaddubsw))]
|
||||
pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pmaddubsw(a, b)
|
||||
|
|
@ -128,7 +128,7 @@ pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
/// products to the 18 most significant bits by right-shifting, rounds the
|
||||
/// truncated value by adding 1, and writes bits [16:1] to the destination.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmulhrsw))]
|
||||
pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pmulhrsw(a, b)
|
||||
|
|
@ -139,7 +139,7 @@ pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
/// Element in result are zeroed out when the corresponding element in `b` is
|
||||
/// zero.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psignb))]
|
||||
pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
psignb(a, b)
|
||||
|
|
@ -150,7 +150,7 @@ pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
|
|||
/// Element in result are zeroed out when the corresponding element in `b` is
|
||||
/// zero.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psignw))]
|
||||
pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
psignw(a, b)
|
||||
|
|
@ -161,7 +161,7 @@ pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
|
|||
/// Element in result are zeroed out when the corresponding element in `b` is
|
||||
/// zero.
|
||||
#[inline(always)]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psignd))]
|
||||
pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
psignd(a, b)
|
||||
|
|
@ -224,25 +224,25 @@ mod tests {
|
|||
|
||||
use x86::*;
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_abs_pi8() {
|
||||
let r = _mm_abs_pi8(_mm_set1_pi8(-5));
|
||||
assert_eq_m64(r, _mm_set1_pi8(5));
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_abs_pi16() {
|
||||
let r = _mm_abs_pi16(_mm_set1_pi16(-5));
|
||||
assert_eq_m64(r, _mm_set1_pi16(5));
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_abs_pi32() {
|
||||
let r = _mm_abs_pi32(_mm_set1_pi32(-5));
|
||||
assert_eq_m64(r, _mm_set1_pi32(5));
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_shuffle_pi8() {
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19);
|
||||
|
|
@ -251,7 +251,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_alignr_pi8() {
|
||||
let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32);
|
||||
let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32);
|
||||
|
|
@ -259,7 +259,7 @@ mod tests {
|
|||
assert_eq_m64(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_hadd_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(4, 128, 4, 3);
|
||||
|
|
@ -268,7 +268,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_hadd_pi32() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let b = _mm_setr_pi32(4, 128);
|
||||
|
|
@ -277,7 +277,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_hadds_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(32767, 1, -32768, -1);
|
||||
|
|
@ -286,7 +286,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_hsub_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(4, 128, 4, 3);
|
||||
|
|
@ -295,7 +295,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_hsub_pi32() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let b = _mm_setr_pi32(4, 128);
|
||||
|
|
@ -304,7 +304,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_hsubs_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(4, 128, 4, 3);
|
||||
|
|
@ -313,7 +313,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_maddubs_pi16() {
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19);
|
||||
|
|
@ -322,7 +322,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_mulhrs_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(4, 32767, -1, -32768);
|
||||
|
|
@ -331,7 +331,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_sign_pi8() {
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8);
|
||||
let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1);
|
||||
|
|
@ -340,7 +340,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_sign_pi16() {
|
||||
let a = _mm_setr_pi16(-1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(1, -1, 1, 0);
|
||||
|
|
@ -349,7 +349,7 @@ mod tests {
|
|||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test = "ssse3"]
|
||||
#[simd_test = "ssse3,mmx"]
|
||||
unsafe fn test_mm_sign_pi32() {
|
||||
let a = _mm_setr_pi32(-1, 2);
|
||||
let b = _mm_setr_pi32(1, 0);
|
||||
|
|
|
|||
|
|
@ -415,11 +415,7 @@ pub use self::i586::*;
|
|||
//
|
||||
// This module is not available for `i586` targets,
|
||||
// but available for all `i686` targets by default
|
||||
#[cfg(any(all(target_arch = "x86", target_feature = "sse2"),
|
||||
target_arch = "x86_64"))]
|
||||
mod i686;
|
||||
#[cfg(any(all(target_arch = "x86", target_feature = "sse2"),
|
||||
target_arch = "x86_64"))]
|
||||
pub use self::i686::*;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue