Bye bye MMX! (#890)
This commit is contained in:
parent
e947c5c073
commit
c06b820716
8 changed files with 0 additions and 2321 deletions
|
|
@ -21,7 +21,6 @@
|
|||
stdsimd,
|
||||
staged_api,
|
||||
doc_cfg,
|
||||
mmx_target_feature,
|
||||
tbm_target_feature,
|
||||
sse4a_target_feature,
|
||||
arm_target_feature,
|
||||
|
|
|
|||
|
|
@ -1,786 +0,0 @@
|
|||
//! `i586` MMX instruction set.
|
||||
//!
|
||||
//! The intrinsics here roughly correspond to those in the `mmintrin.h` C
|
||||
//! header.
|
||||
//!
|
||||
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
|
||||
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
|
||||
//!
|
||||
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
||||
|
||||
use crate::{
|
||||
core_arch::{simd::*, x86::*},
|
||||
mem::transmute,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
/// Constructs a 64-bit integer vector initialized to zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
// FIXME: this produces a movl instead of xorps on x86
|
||||
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
|
||||
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
|
||||
pub unsafe fn _mm_setzero_si64() -> __m64 {
|
||||
transmute(0_i64)
|
||||
}
|
||||
|
||||
/// Adds packed 8-bit integers in `a` and `b`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddb))]
|
||||
pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
paddb(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed 8-bit integers in `a` and `b`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddb))]
|
||||
pub unsafe fn _m_paddb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_add_pi8(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed 16-bit integers in `a` and `b`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddw))]
|
||||
pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
paddw(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed 16-bit integers in `a` and `b`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddw))]
|
||||
pub unsafe fn _m_paddw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_add_pi16(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed 32-bit integers in `a` and `b`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddd))]
|
||||
pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
paddd(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed 32-bit integers in `a` and `b`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddd))]
|
||||
pub unsafe fn _m_paddd(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_add_pi32(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed 8-bit integers in `a` and `b` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddsb))]
|
||||
pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
paddsb(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed 8-bit integers in `a` and `b` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddsb))]
|
||||
pub unsafe fn _m_paddsb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_adds_pi8(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed 16-bit integers in `a` and `b` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddsw))]
|
||||
pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
paddsw(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed 16-bit integers in `a` and `b` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddsw))]
|
||||
pub unsafe fn _m_paddsw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_adds_pi16(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddusb))]
|
||||
pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
paddusb(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddusb))]
|
||||
pub unsafe fn _m_paddusb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_adds_pu8(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddusw))]
|
||||
pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 {
|
||||
paddusw(a, b)
|
||||
}
|
||||
|
||||
/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddusw))]
|
||||
pub unsafe fn _m_paddusw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_adds_pu16(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubb))]
|
||||
pub unsafe fn _mm_sub_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
psubb(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubb))]
|
||||
pub unsafe fn _m_psubb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_sub_pi8(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubw))]
|
||||
pub unsafe fn _mm_sub_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
psubw(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubw))]
|
||||
pub unsafe fn _m_psubw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_sub_pi16(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubd))]
|
||||
pub unsafe fn _mm_sub_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
psubd(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubd))]
|
||||
pub unsafe fn _m_psubd(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_sub_pi32(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
|
||||
/// using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubsb))]
|
||||
pub unsafe fn _mm_subs_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
psubsb(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
|
||||
/// using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubsb))]
|
||||
pub unsafe fn _m_psubsb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_subs_pi8(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
|
||||
/// using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubsw))]
|
||||
pub unsafe fn _mm_subs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
psubsw(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
|
||||
/// using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubsw))]
|
||||
pub unsafe fn _m_psubsw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_subs_pi16(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
|
||||
/// integers in `a` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubusb))]
|
||||
pub unsafe fn _mm_subs_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
psubusb(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
|
||||
/// integers in `a` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubusb))]
|
||||
pub unsafe fn _m_psubusb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_subs_pu8(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
|
||||
/// 16-bit integers in `a` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubusw))]
|
||||
pub unsafe fn _mm_subs_pu16(a: __m64, b: __m64) -> __m64 {
|
||||
psubusw(a, b)
|
||||
}
|
||||
|
||||
/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
|
||||
/// 16-bit integers in `a` using saturation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubusw))]
|
||||
pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_subs_pu16(a, b)
|
||||
}
|
||||
|
||||
/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
|
||||
/// using signed saturation.
|
||||
///
|
||||
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
|
||||
/// less than 0x80 are saturated to 0x80.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(packsswb))]
|
||||
pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
packsswb(a, b)
|
||||
}
|
||||
|
||||
/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
|
||||
/// using signed saturation.
|
||||
///
|
||||
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
|
||||
/// less than 0x80 are saturated to 0x80.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(packssdw))]
|
||||
pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
packssdw(a, b)
|
||||
}
|
||||
|
||||
/// Compares whether each element of `a` is greater than the corresponding
|
||||
/// element of `b` returning `0` for `false` and `-1` for `true`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(pcmpgtb))]
|
||||
pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
pcmpgtb(a, b)
|
||||
}
|
||||
|
||||
/// Compares whether each element of `a` is greater than the corresponding
|
||||
/// element of `b` returning `0` for `false` and `-1` for `true`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(pcmpgtw))]
|
||||
pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pcmpgtw(a, b)
|
||||
}
|
||||
|
||||
/// Compares whether each element of `a` is greater than the corresponding
|
||||
/// element of `b` returning `0` for `false` and `-1` for `true`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(pcmpgtd))]
|
||||
pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
pcmpgtd(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the upper two elements from two `i16x4` vectors and interleaves
|
||||
/// them into the result: `[a.2, b.2, a.3, b.3]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
|
||||
pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
punpckhwd(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the upper four elements from two `i8x8` vectors and interleaves
|
||||
/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(punpckhbw))]
|
||||
pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
punpckhbw(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the lower four elements from two `i8x8` vectors and interleaves
|
||||
/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(punpcklbw))]
|
||||
pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
punpcklbw(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the lower two elements from two `i16x4` vectors and interleaves
|
||||
/// them into the result: `[a.0 b.0 a.1 b.1]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(punpcklwd))]
|
||||
pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
punpcklwd(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the upper element from two `i32x2` vectors and interleaves them
|
||||
/// into the result: `[a.1, b.1]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(punpckhdq))]
|
||||
pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
punpckhdq(a, b)
|
||||
}
|
||||
|
||||
/// Unpacks the lower element from two `i32x2` vectors and interleaves them
|
||||
/// into the result: `[a.0, b.0]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(punpckldq))]
|
||||
pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
punpckldq(a, b)
|
||||
}
|
||||
|
||||
/// Sets packed 16-bit integers in dst with the supplied values.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 {
|
||||
_mm_setr_pi16(e0, e1, e2, e3)
|
||||
}
|
||||
|
||||
/// Sets packed 32-bit integers in dst with the supplied values.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 {
|
||||
_mm_setr_pi32(e0, e1)
|
||||
}
|
||||
|
||||
/// Sets packed 8-bit integers in dst with the supplied values.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 {
|
||||
_mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7)
|
||||
}
|
||||
|
||||
/// Broadcasts 16-bit integer a to all all elements of dst.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 {
|
||||
_mm_setr_pi16(a, a, a, a)
|
||||
}
|
||||
|
||||
/// Broadcasts 32-bit integer a to all all elements of dst.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 {
|
||||
_mm_setr_pi32(a, a)
|
||||
}
|
||||
|
||||
/// Broadcasts 8-bit integer a to all all elements of dst.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 {
|
||||
_mm_setr_pi8(a, a, a, a, a, a, a, a)
|
||||
}
|
||||
|
||||
/// Sets packed 16-bit integers in dst with the supplied values in reverse
|
||||
/// order.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 {
|
||||
transmute(i16x4::new(e0, e1, e2, e3))
|
||||
}
|
||||
|
||||
/// Sets packed 32-bit integers in dst with the supplied values in reverse
|
||||
/// order.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
|
||||
transmute(i32x2::new(e0, e1))
|
||||
}
|
||||
|
||||
/// Sets packed 8-bit integers in dst with the supplied values in reverse order.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn _mm_setr_pi8(
|
||||
e0: i8,
|
||||
e1: i8,
|
||||
e2: i8,
|
||||
e3: i8,
|
||||
e4: i8,
|
||||
e5: i8,
|
||||
e6: i8,
|
||||
e7: i8,
|
||||
) -> __m64 {
|
||||
transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
|
||||
}
|
||||
|
||||
/// Empty the MMX state, which marks the x87 FPU registers as available for use
|
||||
/// by x87 instructions. This instruction must be used at the end of all MMX
|
||||
/// technology procedures.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(emms))]
|
||||
pub unsafe fn _mm_empty() {
|
||||
emms()
|
||||
}
|
||||
|
||||
/// Empty the MMX state, which marks the x87 FPU registers as available for use
|
||||
/// by x87 instructions. This instruction must be used at the end of all MMX
|
||||
/// technology procedures.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
#[cfg_attr(test, assert_instr(emms))]
|
||||
pub unsafe fn _m_empty() {
|
||||
emms()
|
||||
}
|
||||
|
||||
/// Copies 32-bit integer `a` to the lower elements of the return value, and zero
|
||||
/// the upper element of the return value.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn _mm_cvtsi32_si64(a: i32) -> __m64 {
|
||||
transmute(i32x2::new(a, 0))
|
||||
}
|
||||
|
||||
/// Return the lower 32-bit integer in `a`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn _mm_cvtsi64_si32(a: __m64) -> i32 {
|
||||
let r: i32x2 = transmute(a);
|
||||
r.0
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.mmx.padd.b"]
|
||||
fn paddb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.padd.w"]
|
||||
fn paddw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.padd.d"]
|
||||
fn paddd(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.padds.b"]
|
||||
fn paddsb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.padds.w"]
|
||||
fn paddsw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.paddus.b"]
|
||||
fn paddusb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.paddus.w"]
|
||||
fn paddusw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psub.b"]
|
||||
fn psubb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psub.w"]
|
||||
fn psubw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psub.d"]
|
||||
fn psubd(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psubs.b"]
|
||||
fn psubsb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psubs.w"]
|
||||
fn psubsw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psubus.b"]
|
||||
fn psubusb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psubus.w"]
|
||||
fn psubusw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.packsswb"]
|
||||
fn packsswb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.packssdw"]
|
||||
fn packssdw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pcmpgt.b"]
|
||||
fn pcmpgtb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pcmpgt.w"]
|
||||
fn pcmpgtw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pcmpgt.d"]
|
||||
fn pcmpgtd(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpckhwd"]
|
||||
fn punpckhwd(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpcklwd"]
|
||||
fn punpcklwd(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpckhbw"]
|
||||
fn punpckhbw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpcklbw"]
|
||||
fn punpcklbw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpckhdq"]
|
||||
fn punpckhdq(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.punpckldq"]
|
||||
fn punpckldq(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.emms"]
|
||||
fn emms();
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::x86::*;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_setzero_si64() {
|
||||
let r: __m64 = transmute(0_i64);
|
||||
assert_eq_m64(r, _mm_setzero_si64());
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_add_pi8() {
|
||||
let a = _mm_setr_pi8(-1, -1, 1, 1, -1, 0, 1, 0);
|
||||
let b = _mm_setr_pi8(-127, 101, 99, 126, 0, -1, 0, 1);
|
||||
let e = _mm_setr_pi8(-128, 100, 100, 127, -1, -1, 1, 1);
|
||||
assert_eq_m64(e, _mm_add_pi8(a, b));
|
||||
assert_eq_m64(e, _m_paddb(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_add_pi16() {
|
||||
let a = _mm_setr_pi16(-1, -1, 1, 1);
|
||||
let b = _mm_setr_pi16(i16::MIN + 1, 30001, -30001, i16::MAX - 1);
|
||||
let e = _mm_setr_pi16(i16::MIN, 30000, -30000, i16::MAX);
|
||||
assert_eq_m64(e, _mm_add_pi16(a, b));
|
||||
assert_eq_m64(e, _m_paddw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_add_pi32() {
|
||||
let a = _mm_setr_pi32(1, -1);
|
||||
let b = _mm_setr_pi32(i32::MAX - 1, i32::MIN + 1);
|
||||
let e = _mm_setr_pi32(i32::MAX, i32::MIN);
|
||||
assert_eq_m64(e, _mm_add_pi32(a, b));
|
||||
assert_eq_m64(e, _m_paddd(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_adds_pi8() {
|
||||
let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0);
|
||||
let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1);
|
||||
let e = _mm_setr_pi8(i8::MIN, 0, 0, i8::MAX, -1, -1, 1, 1);
|
||||
assert_eq_m64(e, _mm_adds_pi8(a, b));
|
||||
assert_eq_m64(e, _m_paddsb(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_adds_pi16() {
|
||||
let a = _mm_setr_pi16(-32000, 32000, 4, 0);
|
||||
let b = _mm_setr_pi16(-32000, 32000, -5, 1);
|
||||
let e = _mm_setr_pi16(i16::MIN, i16::MAX, -1, 1);
|
||||
assert_eq_m64(e, _mm_adds_pi16(a, b));
|
||||
assert_eq_m64(e, _m_paddsw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_adds_pu8() {
|
||||
let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 200u8 as i8);
|
||||
let b = _mm_setr_pi8(0, 10, 20, 30, 40, 50, 60, 200u8 as i8);
|
||||
let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::MAX as i8);
|
||||
assert_eq_m64(e, _mm_adds_pu8(a, b));
|
||||
assert_eq_m64(e, _m_paddusb(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_adds_pu16() {
|
||||
let a = _mm_setr_pi16(0, 1, 2, 60000u16 as i16);
|
||||
let b = _mm_setr_pi16(0, 10, 20, 60000u16 as i16);
|
||||
let e = _mm_setr_pi16(0, 11, 22, u16::MAX as i16);
|
||||
assert_eq_m64(e, _mm_adds_pu16(a, b));
|
||||
assert_eq_m64(e, _m_paddusw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_sub_pi8() {
|
||||
let a = _mm_setr_pi8(0, 0, 1, 1, -1, -1, 0, 0);
|
||||
let b = _mm_setr_pi8(-1, 1, -2, 2, 100, -100, -127, 127);
|
||||
let e = _mm_setr_pi8(1, -1, 3, -1, -101, 99, 127, -127);
|
||||
assert_eq_m64(e, _mm_sub_pi8(a, b));
|
||||
assert_eq_m64(e, _m_psubb(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_sub_pi16() {
|
||||
let a = _mm_setr_pi16(-20000, -20000, 20000, 30000);
|
||||
let b = _mm_setr_pi16(-10000, 10000, -10000, 30000);
|
||||
let e = _mm_setr_pi16(-10000, -30000, 30000, 0);
|
||||
assert_eq_m64(e, _mm_sub_pi16(a, b));
|
||||
assert_eq_m64(e, _m_psubw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_sub_pi32() {
|
||||
let a = _mm_setr_pi32(500_000, -500_000);
|
||||
let b = _mm_setr_pi32(500_000, 500_000);
|
||||
let e = _mm_setr_pi32(0, -1_000_000);
|
||||
assert_eq_m64(e, _mm_sub_pi32(a, b));
|
||||
assert_eq_m64(e, _m_psubd(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_subs_pi8() {
|
||||
let a = _mm_setr_pi8(-100, 100, 0, 0, 0, 0, -5, 5);
|
||||
let b = _mm_setr_pi8(100, -100, i8::MIN, 127, -1, 1, 3, -3);
|
||||
let e = _mm_setr_pi8(i8::MIN, i8::MAX, i8::MAX, -127, 1, -1, -8, 8);
|
||||
assert_eq_m64(e, _mm_subs_pi8(a, b));
|
||||
assert_eq_m64(e, _m_psubsb(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_subs_pi16() {
|
||||
let a = _mm_setr_pi16(-20000, 20000, 0, 0);
|
||||
let b = _mm_setr_pi16(20000, -20000, -1, 1);
|
||||
let e = _mm_setr_pi16(i16::MIN, i16::MAX, 1, -1);
|
||||
assert_eq_m64(e, _mm_subs_pi16(a, b));
|
||||
assert_eq_m64(e, _m_psubsw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_subs_pu8() {
|
||||
let a = _mm_setr_pi8(50, 10, 20, 30, 40, 60, 70, 80);
|
||||
let b = _mm_setr_pi8(60, 20, 30, 40, 30, 20, 10, 0);
|
||||
let e = _mm_setr_pi8(0, 0, 0, 0, 10, 40, 60, 80);
|
||||
assert_eq_m64(e, _mm_subs_pu8(a, b));
|
||||
assert_eq_m64(e, _m_psubusb(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_subs_pu16() {
|
||||
let a = _mm_setr_pi16(10000, 200, 0, 44444u16 as i16);
|
||||
let b = _mm_setr_pi16(20000, 300, 1, 11111);
|
||||
let e = _mm_setr_pi16(0, 0, 0, 33333u16 as i16);
|
||||
assert_eq_m64(e, _mm_subs_pu16(a, b));
|
||||
assert_eq_m64(e, _m_psubusw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_packs_pi16() {
|
||||
let a = _mm_setr_pi16(-1, 2, -3, 4);
|
||||
let b = _mm_setr_pi16(-5, 6, -7, 8);
|
||||
let r = _mm_setr_pi8(-1, 2, -3, 4, -5, 6, -7, 8);
|
||||
assert_eq_m64(r, _mm_packs_pi16(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_packs_pi32() {
|
||||
let a = _mm_setr_pi32(-1, 2);
|
||||
let b = _mm_setr_pi32(-5, 6);
|
||||
let r = _mm_setr_pi16(-1, 2, -5, 6);
|
||||
assert_eq_m64(r, _mm_packs_pi32(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_cmpgt_pi8() {
|
||||
let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = _mm_setr_pi8(8, 7, 6, 5, 4, 3, 2, 1);
|
||||
let r = _mm_setr_pi8(0, 0, 0, 0, 0, -1, -1, -1);
|
||||
assert_eq_m64(r, _mm_cmpgt_pi8(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_cmpgt_pi16() {
|
||||
let a = _mm_setr_pi16(0, 1, 2, 3);
|
||||
let b = _mm_setr_pi16(4, 3, 2, 1);
|
||||
let r = _mm_setr_pi16(0, 0, 0, -1);
|
||||
assert_eq_m64(r, _mm_cmpgt_pi16(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_cmpgt_pi32() {
|
||||
let a = _mm_setr_pi32(0, 3);
|
||||
let b = _mm_setr_pi32(1, 2);
|
||||
let r0 = _mm_setr_pi32(0, -1);
|
||||
let r1 = _mm_setr_pi32(-1, 0);
|
||||
|
||||
assert_eq_m64(r0, _mm_cmpgt_pi32(a, b));
|
||||
assert_eq_m64(r1, _mm_cmpgt_pi32(b, a));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_unpackhi_pi8() {
|
||||
let a = _mm_setr_pi8(0, 3, 4, 7, 8, 11, 12, 15);
|
||||
let b = _mm_setr_pi8(1, 2, 5, 6, 9, 10, 13, 14);
|
||||
let r = _mm_setr_pi8(8, 9, 11, 10, 12, 13, 15, 14);
|
||||
|
||||
assert_eq_m64(r, _mm_unpackhi_pi8(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_unpacklo_pi8() {
|
||||
let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = _mm_setr_pi8(8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r = _mm_setr_pi8(0, 8, 1, 9, 2, 10, 3, 11);
|
||||
assert_eq_m64(r, _mm_unpacklo_pi8(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_unpackhi_pi16() {
|
||||
let a = _mm_setr_pi16(0, 1, 2, 3);
|
||||
let b = _mm_setr_pi16(4, 5, 6, 7);
|
||||
let r = _mm_setr_pi16(2, 6, 3, 7);
|
||||
assert_eq_m64(r, _mm_unpackhi_pi16(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_unpacklo_pi16() {
|
||||
let a = _mm_setr_pi16(0, 1, 2, 3);
|
||||
let b = _mm_setr_pi16(4, 5, 6, 7);
|
||||
let r = _mm_setr_pi16(0, 4, 1, 5);
|
||||
assert_eq_m64(r, _mm_unpacklo_pi16(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_unpackhi_pi32() {
|
||||
let a = _mm_setr_pi32(0, 3);
|
||||
let b = _mm_setr_pi32(1, 2);
|
||||
let r = _mm_setr_pi32(3, 2);
|
||||
|
||||
assert_eq_m64(r, _mm_unpackhi_pi32(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_unpacklo_pi32() {
|
||||
let a = _mm_setr_pi32(0, 3);
|
||||
let b = _mm_setr_pi32(1, 2);
|
||||
let r = _mm_setr_pi32(0, 1);
|
||||
|
||||
assert_eq_m64(r, _mm_unpacklo_pi32(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_empty() {
|
||||
_mm_empty();
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_m_empty() {
|
||||
_m_empty();
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_cvtsi32_si64() {
|
||||
let a = _mm_cvtsi32_si64(42);
|
||||
let b = _mm_setr_pi32(42, 0);
|
||||
assert_eq_m64(a, b);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "mmx")]
|
||||
unsafe fn test_mm_cvtsi64_si32() {
|
||||
let a = _mm_setr_pi32(42, 666);
|
||||
let b = _mm_cvtsi64_si32(a);
|
||||
assert_eq!(b, 42);
|
||||
}
|
||||
}
|
||||
|
|
@ -6,50 +6,6 @@ use crate::{intrinsics, marker::Sized, mem::transmute};
|
|||
mod macros;
|
||||
|
||||
types! {
|
||||
/// 64-bit wide integer vector type, x86-specific
|
||||
///
|
||||
/// This type is the same as the `__m64` type defined by Intel,
|
||||
/// representing a 64-bit SIMD register. Usage of this type typically
|
||||
/// corresponds to the `mmx` target feature.
|
||||
///
|
||||
/// Internally this type may be viewed as:
|
||||
///
|
||||
/// * `i8x8` - eight `i8` variables packed together
|
||||
/// * `i16x4` - four `i16` variables packed together
|
||||
/// * `i32x2` - two `i32` variables packed together
|
||||
///
|
||||
/// (as well as unsigned versions). Each intrinsic may interpret the
|
||||
/// internal bits differently, check the documentation of the intrinsic
|
||||
/// to see how it's being used.
|
||||
///
|
||||
/// Note that this means that an instance of `__m64` typically just means
|
||||
/// a "bag of bits" which is left up to interpretation at the point of use.
|
||||
///
|
||||
/// Most intrinsics using `__m64` are prefixed with `_mm_` and the
|
||||
/// integer types tend to correspond to suffixes like "pi8" or "pi32" (not
|
||||
/// to be confused with "epiXX", used for `__m128i`).
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(stdsimd, mmx_target_feature)]
|
||||
/// #[cfg(target_arch = "x86")]
|
||||
/// use std::arch::x86::*;
|
||||
/// #[cfg(target_arch = "x86_64")]
|
||||
/// use std::arch::x86_64::*;
|
||||
///
|
||||
/// # fn main() {
|
||||
/// # #[target_feature(enable = "mmx")]
|
||||
/// # unsafe fn foo() {
|
||||
/// let all_bytes_zero = _mm_setzero_si64();
|
||||
/// let all_bytes_one = _mm_set1_pi8(1);
|
||||
/// let two_i32 = _mm_set_pi32(1, 2);
|
||||
/// # }
|
||||
/// # if is_x86_feature_detected!("mmx") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
pub struct __m64(i64);
|
||||
|
||||
/// 128-bit wide integer vector type, x86-specific
|
||||
///
|
||||
/// This type is the same as the `__m128i` type defined by Intel,
|
||||
|
|
@ -359,49 +315,6 @@ mod test;
|
|||
#[cfg(test)]
|
||||
pub use self::test::*;
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdimd_internal", issue = "none")]
|
||||
pub(crate) trait m64Ext: Sized {
|
||||
fn as_m64(self) -> __m64;
|
||||
|
||||
#[inline]
|
||||
fn as_u8x8(self) -> crate::core_arch::simd::u8x8 {
|
||||
unsafe { transmute(self.as_m64()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u16x4(self) -> crate::core_arch::simd::u16x4 {
|
||||
unsafe { transmute(self.as_m64()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u32x2(self) -> crate::core_arch::simd::u32x2 {
|
||||
unsafe { transmute(self.as_m64()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i8x8(self) -> crate::core_arch::simd::i8x8 {
|
||||
unsafe { transmute(self.as_m64()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i16x4(self) -> crate::core_arch::simd::i16x4 {
|
||||
unsafe { transmute(self.as_m64()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i32x2(self) -> crate::core_arch::simd::i32x2 {
|
||||
unsafe { transmute(self.as_m64()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl m64Ext for __m64 {
|
||||
#[inline]
|
||||
fn as_m64(self) -> Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdimd_internal", issue = "none")]
|
||||
pub(crate) trait m128iExt: Sized {
|
||||
|
|
@ -649,9 +562,6 @@ mod tbm;
|
|||
#[cfg(not(stdarch_intel_sde))]
|
||||
pub use self::tbm::*;
|
||||
|
||||
mod mmx;
|
||||
pub use self::mmx::*;
|
||||
|
||||
mod pclmulqdq;
|
||||
pub use self::pclmulqdq::*;
|
||||
|
||||
|
|
|
|||
|
|
@ -1115,33 +1115,6 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
|
|||
movmskps(a)
|
||||
}
|
||||
|
||||
/// Sets the upper two single-precision floating-point values with 64 bits of
|
||||
/// data loaded from the address `p`; the lower two values are passed through
|
||||
/// from `a`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movhps))]
|
||||
// TODO: this function is actually not limited to floats, but that's what
|
||||
// what matches the C type most closely: `(__m128, *const __m64) -> __m128`.
|
||||
pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
|
||||
let q = p as *const f32x2;
|
||||
let b: f32x2 = *q;
|
||||
let bb = simd_shuffle4(b, b, [0, 1, 0, 1]);
|
||||
simd_shuffle4(a, bb, [0, 1, 4, 5])
|
||||
}
|
||||
|
||||
/// Loads two floats from `p` into the lower half of a `__m128`. The upper half
|
||||
/// is copied from the upper half of `a`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(movlps))]
|
||||
pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 {
|
||||
let q = p as *const f32x2;
|
||||
let b: f32x2 = *q;
|
||||
let bb = simd_shuffle4(b, b, [0, 1, 0, 1]);
|
||||
simd_shuffle4(a, bb, [4, 5, 2, 3])
|
||||
}
|
||||
|
||||
/// Construct a `__m128` with the lowest element read from `p` and the other
|
||||
/// elements set to zero.
|
||||
///
|
||||
|
|
@ -1270,72 +1243,6 @@ pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
|
|||
transmute(i64x2(0, ptr::read_unaligned(mem_addr as *const i64)))
|
||||
}
|
||||
|
||||
/// Stores the upper half of `a` (64 bits) into memory.
|
||||
///
|
||||
/// This intrinsic corresponds to the `MOVHPS` instruction. The compiler may
|
||||
/// choose to generate an equivalent sequence of other instructions.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
// On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's
|
||||
// fine.
|
||||
// On i586 (no SSE2) it just generates plain MOV instructions.
|
||||
#[cfg_attr(
|
||||
all(test, any(target_arch = "x86_64", target_feature = "sse2"),
|
||||
not(target_os = "windows")),
|
||||
// assert_instr(movhpd)
|
||||
assert_instr(movhps) // LLVM7 prefers single-precision instructions
|
||||
)]
|
||||
pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) {
|
||||
#[cfg(target_arch = "x86")]
|
||||
{
|
||||
// If this is a `f64x2` then on i586, LLVM generates fldl & fstpl which
|
||||
// is just silly
|
||||
let a64: u64x2 = mem::transmute(a);
|
||||
let a_hi = a64.extract(1);
|
||||
*(p as *mut u64) = a_hi;
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
// If this is a `u64x2` LLVM generates a pshufd + movq, but we really
|
||||
// want a a MOVHPD or MOVHPS here.
|
||||
let a64: f64x2 = mem::transmute(a);
|
||||
let a_hi = a64.extract(1);
|
||||
*p = mem::transmute(a_hi);
|
||||
}
|
||||
}
|
||||
|
||||
/// Stores the lower half of `a` (64 bits) into memory.
|
||||
///
|
||||
/// This intrinsic corresponds to the `MOVQ` instruction. The compiler may
|
||||
/// choose to generate an equivalent sequence of other instructions.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
// On i586 the codegen just generates plane MOVs. No need to test for that.
|
||||
#[cfg_attr(
|
||||
all(
|
||||
test,
|
||||
any(target_arch = "x86_64", target_feature = "sse2"),
|
||||
not(target_os = "windows")
|
||||
),
|
||||
assert_instr(movlps)
|
||||
)]
|
||||
pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) {
|
||||
#[cfg(target_arch = "x86")]
|
||||
{
|
||||
// Same as for _mm_storeh_pi: i586 code gen would use floating point
|
||||
// stack.
|
||||
let a64: u64x2 = mem::transmute(a);
|
||||
let a_hi = a64.extract(0);
|
||||
*(p as *mut u64) = a_hi;
|
||||
}
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
let a64: f64x2 = mem::transmute(a);
|
||||
let a_hi = a64.extract(0);
|
||||
*p = mem::transmute(a_hi);
|
||||
}
|
||||
}
|
||||
|
||||
/// Stores the lowest 32 bit float of `a` into memory.
|
||||
///
|
||||
/// This intrinsic corresponds to the `MOVSS` instruction.
|
||||
|
|
@ -1985,42 +1892,6 @@ extern "C" {
|
|||
fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
|
||||
#[link_name = "llvm.x86.sse.cmp.ss"]
|
||||
fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
|
||||
#[link_name = "llvm.x86.mmx.movnt.dq"]
|
||||
fn movntdq(a: *mut __m64, b: __m64);
|
||||
#[link_name = "llvm.x86.sse.cvtpi2ps"]
|
||||
fn cvtpi2ps(a: __m128, b: __m64) -> __m128;
|
||||
#[link_name = "llvm.x86.mmx.maskmovq"]
|
||||
fn maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8);
|
||||
#[link_name = "llvm.x86.mmx.pextr.w"]
|
||||
fn pextrw(a: __m64, imm8: i32) -> i32;
|
||||
#[link_name = "llvm.x86.mmx.pinsr.w"]
|
||||
fn pinsrw(a: __m64, d: i32, imm8: i32) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pmovmskb"]
|
||||
fn pmovmskb(a: __m64) -> i32;
|
||||
#[link_name = "llvm.x86.sse.pshuf.w"]
|
||||
fn pshufw(a: __m64, imm8: i8) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pmaxs.w"]
|
||||
fn pmaxsw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pmaxu.b"]
|
||||
fn pmaxub(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pmins.w"]
|
||||
fn pminsw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pminu.b"]
|
||||
fn pminub(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pmulhu.w"]
|
||||
fn pmulhuw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pmull.w"]
|
||||
fn pmullw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pavg.b"]
|
||||
fn pavgb(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pavg.w"]
|
||||
fn pavgw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psad.bw"]
|
||||
fn psadbw(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.sse.cvtps2pi"]
|
||||
fn cvtps2pi(a: __m128) -> __m64;
|
||||
#[link_name = "llvm.x86.sse.cvttps2pi"]
|
||||
fn cvttps2pi(a: __m128) -> __m64;
|
||||
}
|
||||
|
||||
/// Stores `a` into the memory at `mem_addr` using a non-temporal memory hint.
|
||||
|
|
@ -2038,463 +1909,6 @@ pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
|
|||
intrinsics::nontemporal_store(mem_addr as *mut __m128, a);
|
||||
}
|
||||
|
||||
/// Stores 64-bits of integer data from a into memory using a non-temporal
|
||||
/// memory hint.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(movntq))]
|
||||
pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) {
|
||||
movntdq(mem_addr, a)
|
||||
}
|
||||
|
||||
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
|
||||
/// greatest value into the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmaxsw))]
|
||||
pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pmaxsw(a, b)
|
||||
}
|
||||
|
||||
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
|
||||
/// greatest value into the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmaxsw))]
|
||||
pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_max_pi16(a, b)
|
||||
}
|
||||
|
||||
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
|
||||
/// greatest value into the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmaxub))]
|
||||
pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
pmaxub(a, b)
|
||||
}
|
||||
|
||||
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
|
||||
/// greatest value into the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmaxub))]
|
||||
pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_max_pu8(a, b)
|
||||
}
|
||||
|
||||
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
|
||||
/// smallest value into the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pminsw))]
|
||||
pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pminsw(a, b)
|
||||
}
|
||||
|
||||
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
|
||||
/// smallest value into the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pminsw))]
|
||||
pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_min_pi16(a, b)
|
||||
}
|
||||
|
||||
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
|
||||
/// smallest value into the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pminub))]
|
||||
pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
pminub(a, b)
|
||||
}
|
||||
|
||||
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
|
||||
/// smallest value into the result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pminub))]
|
||||
pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_min_pu8(a, b)
|
||||
}
|
||||
|
||||
/// Multiplies packed 16-bit unsigned integer values and writes the
|
||||
/// high-order 16 bits of each 32-bit product to the corresponding bits in
|
||||
/// the destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmulhuw))]
|
||||
pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
|
||||
pmulhuw(a, b)
|
||||
}
|
||||
|
||||
/// Multiplies packed 16-bit integer values and writes the
|
||||
/// low-order 16 bits of each 32-bit product to the corresponding bits in
|
||||
/// the destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmullw))]
|
||||
pub unsafe fn _mm_mullo_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pmullw(a, b)
|
||||
}
|
||||
|
||||
/// Multiplies packed 16-bit unsigned integer values and writes the
|
||||
/// high-order 16 bits of each 32-bit product to the corresponding bits in
|
||||
/// the destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmulhuw))]
|
||||
pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_mulhi_pu16(a, b)
|
||||
}
|
||||
|
||||
/// Computes the rounded averages of the packed unsigned 8-bit integer
|
||||
/// values and writes the averages to the corresponding bits in the
|
||||
/// destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pavgb))]
|
||||
pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
pavgb(a, b)
|
||||
}
|
||||
|
||||
/// Computes the rounded averages of the packed unsigned 8-bit integer
|
||||
/// values and writes the averages to the corresponding bits in the
|
||||
/// destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pavgb))]
|
||||
pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_avg_pu8(a, b)
|
||||
}
|
||||
|
||||
/// Computes the rounded averages of the packed unsigned 16-bit integer
|
||||
/// values and writes the averages to the corresponding bits in the
|
||||
/// destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pavgw))]
|
||||
pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
|
||||
pavgw(a, b)
|
||||
}
|
||||
|
||||
/// Computes the rounded averages of the packed unsigned 16-bit integer
|
||||
/// values and writes the averages to the corresponding bits in the
|
||||
/// destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pavgw))]
|
||||
pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_avg_pu16(a, b)
|
||||
}
|
||||
|
||||
/// Subtracts the corresponding 8-bit unsigned integer values of the two
|
||||
/// 64-bit vector operands and computes the absolute value for each of the
|
||||
/// difference. Then sum of the 8 absolute differences is written to the
|
||||
/// bits `[15:0]` of the destination; the remaining bits `[63:16]` are cleared.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psadbw))]
|
||||
pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
|
||||
psadbw(a, b)
|
||||
}
|
||||
|
||||
/// Subtracts the corresponding 8-bit unsigned integer values of the two
|
||||
/// 64-bit vector operands and computes the absolute value for each of the
|
||||
/// difference. Then sum of the 8 absolute differences is written to the
|
||||
/// bits `[15:0]` of the destination; the remaining bits `[63:16]` are cleared.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psadbw))]
|
||||
pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
|
||||
_mm_sad_pu8(a, b)
|
||||
}
|
||||
|
||||
/// Converts two elements of a 64-bit vector of `[2 x i32]` into two
|
||||
/// floating point values and writes them to the lower 64-bits of the
|
||||
/// destination. The remaining higher order elements of the destination are
|
||||
/// copied from the corresponding elements in the first operand.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
|
||||
cvtpi2ps(a, b)
|
||||
}
|
||||
|
||||
/// Converts two elements of a 64-bit vector of `[2 x i32]` into two
|
||||
/// floating point values and writes them to the lower 64-bits of the
|
||||
/// destination. The remaining higher order elements of the destination are
|
||||
/// copied from the corresponding elements in the first operand.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 {
|
||||
_mm_cvtpi32_ps(a, b)
|
||||
}
|
||||
|
||||
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 {
|
||||
let b = _mm_setzero_si64();
|
||||
let b = _mm_cmpgt_pi8(b, a);
|
||||
let b = _mm_unpacklo_pi8(a, b);
|
||||
_mm_cvtpi16_ps(b)
|
||||
}
|
||||
|
||||
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 {
|
||||
let b = _mm_setzero_si64();
|
||||
let b = _mm_unpacklo_pi8(a, b);
|
||||
_mm_cvtpi16_ps(b)
|
||||
}
|
||||
|
||||
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 {
|
||||
let b = _mm_setzero_si64();
|
||||
let b = _mm_cmpgt_pi16(b, a);
|
||||
let c = _mm_unpackhi_pi16(a, b);
|
||||
let r = _mm_setzero_ps();
|
||||
let r = cvtpi2ps(r, c);
|
||||
let r = _mm_movelh_ps(r, r);
|
||||
let c = _mm_unpacklo_pi16(a, b);
|
||||
cvtpi2ps(r, c)
|
||||
}
|
||||
|
||||
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
|
||||
let b = _mm_setzero_si64();
|
||||
let c = _mm_unpackhi_pi16(a, b);
|
||||
let r = _mm_setzero_ps();
|
||||
let r = cvtpi2ps(r, c);
|
||||
let r = _mm_movelh_ps(r, r);
|
||||
let c = _mm_unpacklo_pi16(a, b);
|
||||
cvtpi2ps(r, c)
|
||||
}
|
||||
|
||||
/// Converts the two 32-bit signed integer values from each 64-bit vector
|
||||
/// operand of `[2 x i32]` into a 128-bit vector of `[4 x float]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2ps))]
|
||||
pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
|
||||
let c = _mm_setzero_ps();
|
||||
let c = _mm_cvtpi32_ps(c, b);
|
||||
let c = _mm_movelh_ps(c, c);
|
||||
_mm_cvtpi32_ps(c, a)
|
||||
}
|
||||
|
||||
/// Conditionally copies the values from each 8-bit element in the first
|
||||
/// 64-bit integer vector operand to the specified memory location, as
|
||||
/// specified by the most significant bit in the corresponding element in the
|
||||
/// second 64-bit integer vector operand.
|
||||
///
|
||||
/// To minimize caching, the data is flagged as non-temporal
|
||||
/// (unlikely to be used again soon).
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(maskmovq))]
|
||||
pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
|
||||
maskmovq(a, mask, mem_addr)
|
||||
}
|
||||
|
||||
/// Conditionally copies the values from each 8-bit element in the first
|
||||
/// 64-bit integer vector operand to the specified memory location, as
|
||||
/// specified by the most significant bit in the corresponding element in the
|
||||
/// second 64-bit integer vector operand.
|
||||
///
|
||||
/// To minimize caching, the data is flagged as non-temporal
|
||||
/// (unlikely to be used again soon).
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(maskmovq))]
|
||||
pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
|
||||
_mm_maskmove_si64(a, mask, mem_addr)
|
||||
}
|
||||
|
||||
/// Extracts 16-bit element from a 64-bit vector of `[4 x i16]` and
|
||||
/// returns it, as specified by the immediate integer operand.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 {
|
||||
macro_rules! call {
|
||||
($imm2:expr) => {
|
||||
pextrw(a, $imm2) as i32
|
||||
};
|
||||
}
|
||||
constify_imm2!(imm2, call)
|
||||
}
|
||||
|
||||
/// Extracts 16-bit element from a 64-bit vector of `[4 x i16]` and
|
||||
/// returns it, as specified by the immediate integer operand.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 {
|
||||
macro_rules! call {
|
||||
($imm2:expr) => {
|
||||
pextrw(a, $imm2) as i32
|
||||
};
|
||||
}
|
||||
constify_imm2!(imm2, call)
|
||||
}
|
||||
|
||||
/// Copies data from the 64-bit vector of `[4 x i16]` to the destination,
|
||||
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
|
||||
/// specified by the immediate operand `n`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
($imm2:expr) => {
|
||||
pinsrw(a, d, $imm2)
|
||||
};
|
||||
}
|
||||
constify_imm2!(imm2, call)
|
||||
}
|
||||
|
||||
/// Copies data from the 64-bit vector of `[4 x i16]` to the destination,
|
||||
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
|
||||
/// specified by the immediate operand `n`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
($imm2:expr) => {
|
||||
pinsrw(a, d, $imm2)
|
||||
};
|
||||
}
|
||||
constify_imm2!(imm2, call)
|
||||
}
|
||||
|
||||
/// Takes the most significant bit from each 8-bit element in a 64-bit
|
||||
/// integer vector to create a 16-bit mask value. Zero-extends the value to
|
||||
/// 32-bit integer and writes it to the destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmovmskb))]
|
||||
pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
|
||||
pmovmskb(a)
|
||||
}
|
||||
|
||||
/// Takes the most significant bit from each 8-bit element in a 64-bit
|
||||
/// integer vector to create a 16-bit mask value. Zero-extends the value to
|
||||
/// 32-bit integer and writes it to the destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmovmskb))]
|
||||
pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
|
||||
_mm_movemask_pi8(a)
|
||||
}
|
||||
|
||||
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
|
||||
/// destination, as specified by the immediate value operand.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
pshufw(a, $imm8)
|
||||
};
|
||||
}
|
||||
constify_imm8!(imm8, call)
|
||||
}
|
||||
|
||||
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
|
||||
/// destination, as specified by the immediate value operand.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
pshufw(a, $imm8)
|
||||
};
|
||||
}
|
||||
constify_imm8!(imm8, call)
|
||||
}
|
||||
|
||||
/// Converts the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers with truncation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvttps2pi))]
|
||||
pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
|
||||
cvttps2pi(a)
|
||||
}
|
||||
|
||||
/// Converts the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers with truncation.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvttps2pi))]
|
||||
pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
|
||||
_mm_cvttps_pi32(a)
|
||||
}
|
||||
|
||||
/// Converts the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 {
|
||||
cvtps2pi(a)
|
||||
}
|
||||
|
||||
/// Converts the two lower packed single-precision (32-bit) floating-point
|
||||
/// elements in `a` to packed 32-bit integers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 {
|
||||
_mm_cvtps_pi32(a)
|
||||
}
|
||||
|
||||
/// Converts packed single-precision (32-bit) floating-point elements in `a` to
|
||||
/// packed 16-bit integers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 {
|
||||
let b = _mm_cvtps_pi32(a);
|
||||
let a = _mm_movehl_ps(a, a);
|
||||
let c = _mm_cvtps_pi32(a);
|
||||
_mm_packs_pi32(b, c)
|
||||
}
|
||||
|
||||
/// Converts packed single-precision (32-bit) floating-point elements in `a` to
|
||||
/// packed 8-bit integers, and returns theem in the lower 4 elements of the
|
||||
/// result.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtps2pi))]
|
||||
pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
|
||||
let b = _mm_cvtps_pi16(a);
|
||||
let c = _mm_setzero_si64();
|
||||
_mm_packs_pi16(b, c)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{hint::black_box, mem::transmute};
|
||||
|
|
@ -3593,24 +3007,6 @@ mod tests {
|
|||
assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_loadh_pi() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
|
||||
let p = x[..].as_ptr();
|
||||
let r = _mm_loadh_pi(a, p as *const _);
|
||||
assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_loadl_pi() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
|
||||
let p = x[..].as_ptr();
|
||||
let r = _mm_loadl_pi(a, p as *const _);
|
||||
assert_eq_m128(r, _mm_setr_ps(5.0, 6.0, 3.0, 4.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_load_ss() {
|
||||
let a = 42.0f32;
|
||||
|
|
@ -3684,28 +3080,6 @@ mod tests {
|
|||
assert_eq_m128i(r, _mm_set_epi64x(5, 0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_storeh_pi() {
|
||||
let mut vals = [0.0f32; 8];
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
_mm_storeh_pi(vals.as_mut_ptr() as *mut _, a);
|
||||
|
||||
assert_eq!(vals[0], 3.0);
|
||||
assert_eq!(vals[1], 4.0);
|
||||
assert_eq!(vals[2], 0.0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_storel_pi() {
|
||||
let mut vals = [0.0f32; 8];
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
_mm_storel_pi(vals.as_mut_ptr() as *mut _, a);
|
||||
|
||||
assert_eq!(vals[0], 1.0);
|
||||
assert_eq!(vals[1], 2.0);
|
||||
assert_eq!(vals[2], 0.0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_mm_store_ss() {
|
||||
let mut vals = [0.0f32; 8];
|
||||
|
|
@ -3926,254 +3300,4 @@ mod tests {
|
|||
assert_eq!(mem.data[i], get_m128(a, i));
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_stream_pi() {
|
||||
let a = transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7));
|
||||
let mut mem = boxed::Box::<__m64>::new(transmute(i8x8::splat(1)));
|
||||
_mm_stream_pi(&mut *mem as *mut _ as *mut _, a);
|
||||
assert_eq_m64(a, *mem);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_max_pi16() {
|
||||
let a = _mm_setr_pi16(-1, 6, -3, 8);
|
||||
let b = _mm_setr_pi16(5, -2, 7, -4);
|
||||
let r = _mm_setr_pi16(5, 6, 7, 8);
|
||||
|
||||
assert_eq_m64(r, _mm_max_pi16(a, b));
|
||||
assert_eq_m64(r, _m_pmaxsw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_max_pu8() {
|
||||
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
|
||||
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
|
||||
let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8);
|
||||
|
||||
assert_eq_m64(r, _mm_max_pu8(a, b));
|
||||
assert_eq_m64(r, _m_pmaxub(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_min_pi16() {
|
||||
let a = _mm_setr_pi16(-1, 6, -3, 8);
|
||||
let b = _mm_setr_pi16(5, -2, 7, -4);
|
||||
let r = _mm_setr_pi16(-1, -2, -3, -4);
|
||||
|
||||
assert_eq_m64(r, _mm_min_pi16(a, b));
|
||||
assert_eq_m64(r, _m_pminsw(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_min_pu8() {
|
||||
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
|
||||
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
|
||||
let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4);
|
||||
|
||||
assert_eq_m64(r, _mm_min_pu8(a, b));
|
||||
assert_eq_m64(r, _m_pminub(a, b));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_mulhi_pu16() {
|
||||
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
|
||||
let r = _mm_mulhi_pu16(a, b);
|
||||
assert_eq_m64(r, _mm_set1_pi16(15));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_mullo_pi16() {
|
||||
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
|
||||
let r = _mm_mullo_pi16(a, b);
|
||||
assert_eq_m64(r, _mm_set1_pi16(17960));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_m_pmulhuw() {
|
||||
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
|
||||
let r = _m_pmulhuw(a, b);
|
||||
assert_eq_m64(r, _mm_set1_pi16(15));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_avg_pu8() {
|
||||
let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9));
|
||||
let r = _mm_avg_pu8(a, b);
|
||||
assert_eq_m64(r, _mm_set1_pi8(6));
|
||||
|
||||
let r = _m_pavgb(a, b);
|
||||
assert_eq_m64(r, _mm_set1_pi8(6));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_avg_pu16() {
|
||||
let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9));
|
||||
let r = _mm_avg_pu16(a, b);
|
||||
assert_eq_m64(r, _mm_set1_pi16(6));
|
||||
|
||||
let r = _m_pavgw(a, b);
|
||||
assert_eq_m64(r, _mm_set1_pi16(6));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_sad_pu8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm_setr_pi8(
|
||||
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
|
||||
1, 2, 3, 4,
|
||||
);
|
||||
let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1);
|
||||
let r = _mm_sad_pu8(a, b);
|
||||
assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
|
||||
|
||||
let r = _m_psadbw(a, b);
|
||||
assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_cvtpi32_ps() {
|
||||
let a = _mm_setr_ps(0., 0., 3., 4.);
|
||||
let b = _mm_setr_pi32(1, 2);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpi32_ps(a, b);
|
||||
assert_eq_m128(r, expected);
|
||||
|
||||
let r = _mm_cvt_pi2ps(a, b);
|
||||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_cvtpi16_ps() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpi16_ps(a);
|
||||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_cvtpu16_ps() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpu16_ps(a);
|
||||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_cvtpi8_ps() {
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpi8_ps(a);
|
||||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_cvtpu8_ps() {
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpu8_ps(a);
|
||||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_cvtpi32x2_ps() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let b = _mm_setr_pi32(3, 4);
|
||||
let expected = _mm_setr_ps(1., 2., 3., 4.);
|
||||
let r = _mm_cvtpi32x2_ps(a, b);
|
||||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_maskmove_si64() {
|
||||
let a = _mm_set1_pi8(9);
|
||||
let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0);
|
||||
let mut r = _mm_set1_pi8(0);
|
||||
_mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8);
|
||||
let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0);
|
||||
assert_eq_m64(r, e);
|
||||
|
||||
let mut r = _mm_set1_pi8(0);
|
||||
_m_maskmovq(a, mask, &mut r as *mut _ as *mut i8);
|
||||
assert_eq_m64(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_extract_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let r = _mm_extract_pi16(a, 0);
|
||||
assert_eq!(r, 1);
|
||||
let r = _mm_extract_pi16(a, 1);
|
||||
assert_eq!(r, 2);
|
||||
|
||||
let r = _m_pextrw(a, 1);
|
||||
assert_eq!(r, 2);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_insert_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let r = _mm_insert_pi16(a, 0, 0b0);
|
||||
let expected = _mm_setr_pi16(0, 2, 3, 4);
|
||||
assert_eq_m64(r, expected);
|
||||
let r = _mm_insert_pi16(a, 0, 0b10);
|
||||
let expected = _mm_setr_pi16(1, 2, 0, 4);
|
||||
assert_eq_m64(r, expected);
|
||||
|
||||
let r = _m_pinsrw(a, 0, 0b10);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_movemask_pi8() {
|
||||
let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
|
||||
let r = _mm_movemask_pi8(a);
|
||||
assert_eq!(r, 0b10001);
|
||||
|
||||
let r = _m_pmovmskb(a);
|
||||
assert_eq!(r, 0b10001);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_shuffle_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let r = _mm_shuffle_pi16(a, 0b00_01_01_11);
|
||||
let expected = _mm_setr_pi16(4, 2, 2, 1);
|
||||
assert_eq_m64(r, expected);
|
||||
|
||||
let r = _m_pshufw(a, 0b00_01_01_11);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_cvtps_pi32() {
|
||||
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
|
||||
let r = _mm_setr_pi32(1, 2);
|
||||
|
||||
assert_eq_m64(r, _mm_cvtps_pi32(a));
|
||||
assert_eq_m64(r, _mm_cvt_ps2pi(a));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_cvttps_pi32() {
|
||||
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
|
||||
let r = _mm_setr_pi32(7, 2);
|
||||
|
||||
assert_eq_m64(r, _mm_cvttps_pi32(a));
|
||||
assert_eq_m64(r, _mm_cvtt_ps2pi(a));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_cvtps_pi16() {
|
||||
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
|
||||
let r = _mm_setr_pi16(7, 2, 3, 4);
|
||||
assert_eq_m64(r, _mm_cvtps_pi16(a));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse,mmx")]
|
||||
unsafe fn test_mm_cvtps_pi8() {
|
||||
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
|
||||
let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0);
|
||||
assert_eq_m64(r, _mm_cvtps_pi8(a));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2958,113 +2958,6 @@ pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
|
|||
simd_shuffle2(a, b, [0, 2])
|
||||
}
|
||||
|
||||
/// Adds two signed or unsigned 64-bit integer values, returning the
|
||||
/// lower 64 bits of the sum.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(paddq))]
|
||||
pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
|
||||
paddq(a, b)
|
||||
}
|
||||
|
||||
/// Multiplies 32-bit unsigned integer values contained in the lower bits
|
||||
/// of the two 64-bit integer vectors and returns the 64-bit unsigned
|
||||
/// product.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmuludq))]
|
||||
pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
|
||||
pmuludq2(a, b)
|
||||
}
|
||||
|
||||
/// Subtracts signed or unsigned 64-bit integer values and writes the
|
||||
/// difference to the corresponding bits in the destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psubq))]
|
||||
pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
|
||||
psubq(a, b)
|
||||
}
|
||||
|
||||
/// Converts the two signed 32-bit integer elements of a 64-bit vector of
|
||||
/// `[2 x i32]` into two double-precision floating-point values, returned in a
|
||||
/// 128-bit vector of `[2 x double]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpi2pd))]
|
||||
pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d {
|
||||
cvtpi2pd(a)
|
||||
}
|
||||
|
||||
/// Initializes both 64-bit values in a 128-bit vector of `[2 x i64]` with
|
||||
/// the specified 64-bit integer values.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
// no particular instruction to test
|
||||
pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i {
|
||||
_mm_set_epi64x(transmute(e1), transmute(e0))
|
||||
}
|
||||
|
||||
/// Initializes both values in a 128-bit vector of `[2 x i64]` with the
|
||||
/// specified 64-bit value.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
// no particular instruction to test
|
||||
pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i {
|
||||
_mm_set_epi64x(transmute(a), transmute(a))
|
||||
}
|
||||
|
||||
/// Constructs a 128-bit integer vector, initialized in reverse order
|
||||
/// with the specified 64-bit integral values.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
// no particular instruction to test
|
||||
pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i {
|
||||
_mm_set_epi64x(transmute(e0), transmute(e1))
|
||||
}
|
||||
|
||||
/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
|
||||
/// integer.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
|
||||
// instr?
|
||||
pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 {
|
||||
transmute(simd_extract::<_, i64>(a.as_i64x2(), 0))
|
||||
}
|
||||
|
||||
/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
|
||||
/// upper bits.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
|
||||
// instr?
|
||||
pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i {
|
||||
_mm_set_epi64x(0, transmute(a))
|
||||
}
|
||||
|
||||
/// Converts the two double-precision floating-point elements of a
|
||||
/// 128-bit vector of `[2 x double]` into two signed 32-bit integer values,
|
||||
/// returned in a 64-bit vector of `[2 x i32]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvtpd2pi))]
|
||||
pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 {
|
||||
cvtpd2pi(a)
|
||||
}
|
||||
|
||||
/// Converts the two double-precision floating-point elements of a
|
||||
/// 128-bit vector of `[2 x double]` into two signed 32-bit integer values,
|
||||
/// returned in a 64-bit vector of `[2 x i32]`.
|
||||
/// If the result of either conversion is inexact, the result is truncated
|
||||
/// (rounded towards zero) regardless of the current MXCSR setting.
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2,mmx")]
|
||||
#[cfg_attr(test, assert_instr(cvttpd2pi))]
|
||||
pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 {
|
||||
cvttpd2pi(a)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.sse2.pause"]
|
||||
|
|
@ -3207,18 +3100,6 @@ extern "C" {
|
|||
fn storeudq(mem_addr: *mut i8, a: __m128i);
|
||||
#[link_name = "llvm.x86.sse2.storeu.pd"]
|
||||
fn storeupd(mem_addr: *mut i8, a: __m128d);
|
||||
#[link_name = "llvm.x86.mmx.padd.q"]
|
||||
fn paddq(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.pmulu.dq"]
|
||||
fn pmuludq2(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.mmx.psub.q"]
|
||||
fn psubq(a: __m64, b: __m64) -> __m64;
|
||||
#[link_name = "llvm.x86.sse.cvtpi2pd"]
|
||||
fn cvtpi2pd(a: __m64) -> __m128d;
|
||||
#[link_name = "llvm.x86.sse.cvtpd2pi"]
|
||||
fn cvtpd2pi(a: __m128d) -> __m64;
|
||||
#[link_name = "llvm.x86.sse.cvttpd2pi"]
|
||||
fn cvttpd2pi(a: __m128d) -> __m64;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -5208,87 +5089,4 @@ mod tests {
|
|||
let r = _mm_castsi128_ps(a);
|
||||
assert_eq_m128(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2,mmx")]
|
||||
unsafe fn test_mm_add_si64() {
|
||||
let a = 1i64;
|
||||
let b = 2i64;
|
||||
let expected = 3i64;
|
||||
let r = _mm_add_si64(transmute(a), transmute(b));
|
||||
assert_eq!(transmute::<__m64, i64>(r), expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2,mmx")]
|
||||
unsafe fn test_mm_mul_su32() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let b = _mm_setr_pi32(3, 4);
|
||||
let expected = 3u64;
|
||||
let r = _mm_mul_su32(a, b);
|
||||
assert_eq_m64(r, transmute(expected));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2,mmx")]
|
||||
unsafe fn test_mm_sub_si64() {
|
||||
let a = 1i64;
|
||||
let b = 2i64;
|
||||
let expected = -1i64;
|
||||
let r = _mm_sub_si64(transmute(a), transmute(b));
|
||||
assert_eq!(transmute::<__m64, i64>(r), expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2,mmx")]
|
||||
unsafe fn test_mm_cvtpi32_pd() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let expected = _mm_setr_pd(1., 2.);
|
||||
let r = _mm_cvtpi32_pd(a);
|
||||
assert_eq_m128d(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2,mmx")]
|
||||
unsafe fn test_mm_set_epi64() {
|
||||
let r = _mm_set_epi64(transmute(1i64), transmute(2i64));
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(2, 1));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2,mmx")]
|
||||
unsafe fn test_mm_set1_epi64() {
|
||||
let r = _mm_set1_epi64(transmute(1i64));
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(1, 1));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2,mmx")]
|
||||
unsafe fn test_mm_setr_epi64() {
|
||||
let r = _mm_setr_epi64(transmute(1i64), transmute(2i64));
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2,mmx")]
|
||||
unsafe fn test_mm_movepi64_pi64() {
|
||||
let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0));
|
||||
assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2,mmx")]
|
||||
unsafe fn test_mm_movpi64_epi64() {
|
||||
let r = _mm_movpi64_epi64(_mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
|
||||
assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2,mmx")]
|
||||
unsafe fn test_mm_cvtpd_pi32() {
|
||||
let a = _mm_setr_pd(5., 0.);
|
||||
let r = _mm_cvtpd_pi32(a);
|
||||
assert_eq_m64(r, _mm_setr_pi32(5, 0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2,mmx")]
|
||||
unsafe fn test_mm_cvttpd_pi32() {
|
||||
let a = _mm_setr_pd(5., 0.);
|
||||
let r = _mm_cvttpd_pi32(a);
|
||||
assert_eq_m64(r, _mm_setr_pi32(5, 0));
|
||||
|
||||
let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
|
||||
let r = _mm_cvttpd_pi32(a);
|
||||
assert_eq_m64(r, _mm_setr_pi32(i32::MIN, i32::MIN));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -299,169 +299,6 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
||||
/// Computes the absolute value of packed 8-bit integers in `a` and
|
||||
/// return the unsigned results.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pabsb))]
|
||||
pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
|
||||
pabsb(a)
|
||||
}
|
||||
|
||||
/// Computes the absolute value of packed 8-bit integers in `a`, and returns the
|
||||
/// unsigned results.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pabsw))]
|
||||
pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
|
||||
pabsw(a)
|
||||
}
|
||||
|
||||
/// Computes the absolute value of packed 32-bit integers in `a`, and returns the
|
||||
/// unsigned results.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pabsd))]
|
||||
pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
|
||||
pabsd(a)
|
||||
}
|
||||
|
||||
/// Shuffles packed 8-bit integers in `a` according to shuffle control mask in
|
||||
/// the corresponding 8-bit element of `b`, and returns the results
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pshufb))]
|
||||
pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
pshufb(a, b)
|
||||
}
|
||||
|
||||
/// Concatenates the two 64-bit integer vector operands, and right-shifts
|
||||
/// the result by the number of bytes specified in the immediate operand.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(palignr, n = 15))]
|
||||
#[rustc_args_required_const(2)]
|
||||
pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
palignrb(a, b, $imm8)
|
||||
};
|
||||
}
|
||||
constify_imm8!(n, call)
|
||||
}
|
||||
|
||||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 64-bit vectors of `[4 x i16]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phaddw))]
|
||||
pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phaddw(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 64-bit vectors of `[2 x i32]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phaddd))]
|
||||
pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
phaddd(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 64-bit vectors of `[4 x i16]`. Positive sums greater than 7FFFh are
|
||||
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phaddsw))]
|
||||
pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phaddsw(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 64-bit vectors of `[4 x i16]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phsubw))]
|
||||
pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phsubw(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 64-bit vectors of `[2 x i32]`.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phsubd))]
|
||||
pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
phsubd(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 64-bit vectors of `[4 x i16]`. Positive differences greater than
|
||||
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
|
||||
/// saturated to 8000h.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(phsubsw))]
|
||||
pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
phsubsw(a, b)
|
||||
}
|
||||
|
||||
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
|
||||
/// values contained in the first source operand and packed 8-bit signed
|
||||
/// integer values contained in the second source operand, adds pairs of
|
||||
/// contiguous products with signed saturation, and writes the 16-bit sums to
|
||||
/// the corresponding bits in the destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmaddubsw))]
|
||||
pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pmaddubsw(a, b)
|
||||
}
|
||||
|
||||
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
|
||||
/// products to the 18 most significant bits by right-shifting, rounds the
|
||||
/// truncated value by adding 1, and writes bits `[16:1]` to the destination.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(pmulhrsw))]
|
||||
pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
pmulhrsw(a, b)
|
||||
}
|
||||
|
||||
/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
|
||||
/// integer in `b` is negative, and returns the results.
|
||||
/// Element in result are zeroed out when the corresponding element in `b` is
|
||||
/// zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psignb))]
|
||||
pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
|
||||
psignb(a, b)
|
||||
}
|
||||
|
||||
/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
|
||||
/// integer in `b` is negative, and returns the results.
|
||||
/// Element in result are zeroed out when the corresponding element in `b` is
|
||||
/// zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psignw))]
|
||||
pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
|
||||
psignw(a, b)
|
||||
}
|
||||
|
||||
/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
|
||||
/// integer in `b` is negative, and returns the results.
|
||||
/// Element in result are zeroed out when the corresponding element in `b` is
|
||||
/// zero.
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3,mmx")]
|
||||
#[cfg_attr(test, assert_instr(psignd))]
|
||||
pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
|
||||
psignd(a, b)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.ssse3.pabs.b.128"]
|
||||
|
|
@ -508,54 +345,6 @@ extern "C" {
|
|||
|
||||
#[link_name = "llvm.x86.ssse3.psign.d.128"]
|
||||
fn psignd128(a: i32x4, b: i32x4) -> i32x4;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pabs.b"]
|
||||
fn pabsb(a: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pabs.w"]
|
||||
fn pabsw(a: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pabs.d"]
|
||||
fn pabsd(a: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pshuf.b"]
|
||||
fn pshufb(a: __m64, b: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.mmx.palignr.b"]
|
||||
fn palignrb(a: __m64, b: __m64, n: u8) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phadd.w"]
|
||||
fn phaddw(a: __m64, b: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phadd.d"]
|
||||
fn phaddd(a: __m64, b: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phadd.sw"]
|
||||
fn phaddsw(a: __m64, b: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phsub.w"]
|
||||
fn phsubw(a: __m64, b: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phsub.d"]
|
||||
fn phsubd(a: __m64, b: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.phsub.sw"]
|
||||
fn phsubsw(a: __m64, b: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pmadd.ub.sw"]
|
||||
fn pmaddubsw(a: __m64, b: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.pmul.hr.sw"]
|
||||
fn pmulhrsw(a: __m64, b: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.psign.b"]
|
||||
fn psignb(a: __m64, b: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.psign.w"]
|
||||
fn psignw(a: __m64, b: __m64) -> __m64;
|
||||
|
||||
#[link_name = "llvm.x86.ssse3.psign.d"]
|
||||
fn psignd(a: __m64, b: __m64) -> __m64;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -761,138 +550,4 @@ mod tests {
|
|||
let r = _mm_sign_epi32(a, b);
|
||||
assert_eq_m128i(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_abs_pi8() {
|
||||
let r = _mm_abs_pi8(_mm_set1_pi8(-5));
|
||||
assert_eq_m64(r, _mm_set1_pi8(5));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_abs_pi16() {
|
||||
let r = _mm_abs_pi16(_mm_set1_pi16(-5));
|
||||
assert_eq_m64(r, _mm_set1_pi16(5));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_abs_pi32() {
|
||||
let r = _mm_abs_pi32(_mm_set1_pi32(-5));
|
||||
assert_eq_m64(r, _mm_set1_pi32(5));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_shuffle_pi8() {
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19);
|
||||
let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4);
|
||||
let r = _mm_shuffle_pi8(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_alignr_pi8() {
|
||||
let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32);
|
||||
let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32);
|
||||
let r = _mm_alignr_pi8(a, b, 4);
|
||||
assert_eq_m64(r, transmute(0x89abcdefffddeecc_u64));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_hadd_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(4, 128, 4, 3);
|
||||
let expected = _mm_setr_pi16(3, 7, 132, 7);
|
||||
let r = _mm_hadd_pi16(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_hadd_pi32() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let b = _mm_setr_pi32(4, 128);
|
||||
let expected = _mm_setr_pi32(3, 132);
|
||||
let r = _mm_hadd_pi32(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_hadds_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(32767, 1, -32768, -1);
|
||||
let expected = _mm_setr_pi16(3, 7, 32767, -32768);
|
||||
let r = _mm_hadds_pi16(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_hsub_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(4, 128, 4, 3);
|
||||
let expected = _mm_setr_pi16(-1, -1, -124, 1);
|
||||
let r = _mm_hsub_pi16(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_hsub_pi32() {
|
||||
let a = _mm_setr_pi32(1, 2);
|
||||
let b = _mm_setr_pi32(4, 128);
|
||||
let expected = _mm_setr_pi32(-1, -124);
|
||||
let r = _mm_hsub_pi32(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_hsubs_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(4, 128, 4, 3);
|
||||
let expected = _mm_setr_pi16(-1, -1, -124, 1);
|
||||
let r = _mm_hsubs_pi16(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_maddubs_pi16() {
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19);
|
||||
let expected = _mm_setr_pi16(130, 24, 192, 194);
|
||||
let r = _mm_maddubs_pi16(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_mulhrs_pi16() {
|
||||
let a = _mm_setr_pi16(1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(4, 32767, -1, -32768);
|
||||
let expected = _mm_setr_pi16(0, 2, 0, -4);
|
||||
let r = _mm_mulhrs_pi16(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_sign_pi8() {
|
||||
let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8);
|
||||
let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1);
|
||||
let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8);
|
||||
let r = _mm_sign_pi8(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_sign_pi16() {
|
||||
let a = _mm_setr_pi16(-1, 2, 3, 4);
|
||||
let b = _mm_setr_pi16(1, -1, 1, 0);
|
||||
let expected = _mm_setr_pi16(-1, -2, 3, 0);
|
||||
let r = _mm_sign_pi16(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "ssse3,mmx")]
|
||||
unsafe fn test_mm_sign_pi32() {
|
||||
let a = _mm_setr_pi32(-1, 2);
|
||||
let b = _mm_setr_pi32(1, 0);
|
||||
let expected = _mm_setr_pi32(-1, 0);
|
||||
let r = _mm_sign_pi32(a, b);
|
||||
assert_eq_m64(r, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,15 +2,6 @@
|
|||
|
||||
use crate::core_arch::x86::*;
|
||||
|
||||
#[target_feature(enable = "mmx")]
|
||||
pub unsafe fn assert_eq_m64(a: __m64, b: __m64) {
|
||||
union A {
|
||||
a: __m64,
|
||||
b: u64,
|
||||
}
|
||||
assert_eq!(A { a }.b, A { a: b }.b)
|
||||
}
|
||||
|
||||
#[target_feature(enable = "sse2")]
|
||||
pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
|
||||
union A {
|
||||
|
|
|
|||
|
|
@ -44,8 +44,6 @@ pub fn simd_test(
|
|||
.map(String::from)
|
||||
.collect();
|
||||
|
||||
let mmx = target_features.iter().any(|s| s.starts_with("mmx"));
|
||||
|
||||
let enable_feature = string(enable_feature);
|
||||
let item = TokenStream::from(item);
|
||||
let name = find_name(item.clone());
|
||||
|
|
@ -106,15 +104,6 @@ pub fn simd_test(
|
|||
TokenStream::new()
|
||||
};
|
||||
|
||||
let emms = if mmx {
|
||||
// note: if the test requires MMX we need to clear the FPU
|
||||
// registers once the test finishes before interfacing with
|
||||
// other x87 code:
|
||||
quote! { unsafe { super::_mm_empty() }; }
|
||||
} else {
|
||||
TokenStream::new()
|
||||
};
|
||||
|
||||
let ret: TokenStream = quote_spanned! {
|
||||
proc_macro2::Span::call_site() =>
|
||||
#[allow(non_snake_case)]
|
||||
|
|
@ -123,7 +112,6 @@ pub fn simd_test(
|
|||
fn #name() {
|
||||
if #force_test | (#cfg_target_features) {
|
||||
let v = unsafe { #name() };
|
||||
#emms
|
||||
return v;
|
||||
} else {
|
||||
::stdarch_test::assert_skip_test_ok(stringify!(#name));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue