Bye bye MMX! (#890)

This commit is contained in:
Mateusz Mikuła 2020-09-03 15:12:19 +02:00 committed by GitHub
parent e947c5c073
commit c06b820716
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 0 additions and 2321 deletions

View file

@ -21,7 +21,6 @@
stdsimd,
staged_api,
doc_cfg,
mmx_target_feature,
tbm_target_feature,
sse4a_target_feature,
arm_target_feature,

View file

@ -1,786 +0,0 @@
//! `i586` MMX instruction set.
//!
//! The intrinsics here roughly correspond to those in the `mmintrin.h` C
//! header.
//!
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
//!
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
use crate::{
core_arch::{simd::*, x86::*},
mem::transmute,
};
#[cfg(test)]
use stdarch_test::assert_instr;
/// Constructs a 64-bit integer vector initialized to zero.
#[inline]
#[target_feature(enable = "mmx")]
// FIXME: this produces a movl instead of xorps on x86
// FIXME: this produces a xor intrinsic instead of xorps on x86_64
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(xor))]
pub unsafe fn _mm_setzero_si64() -> __m64 {
transmute(0_i64)
}
/// Adds packed 8-bit integers in `a` and `b`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddb))]
pub unsafe fn _mm_add_pi8(a: __m64, b: __m64) -> __m64 {
paddb(a, b)
}
/// Adds packed 8-bit integers in `a` and `b`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddb))]
pub unsafe fn _m_paddb(a: __m64, b: __m64) -> __m64 {
_mm_add_pi8(a, b)
}
/// Adds packed 16-bit integers in `a` and `b`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddw))]
pub unsafe fn _mm_add_pi16(a: __m64, b: __m64) -> __m64 {
paddw(a, b)
}
/// Adds packed 16-bit integers in `a` and `b`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddw))]
pub unsafe fn _m_paddw(a: __m64, b: __m64) -> __m64 {
_mm_add_pi16(a, b)
}
/// Adds packed 32-bit integers in `a` and `b`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddd))]
pub unsafe fn _mm_add_pi32(a: __m64, b: __m64) -> __m64 {
paddd(a, b)
}
/// Adds packed 32-bit integers in `a` and `b`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddd))]
pub unsafe fn _m_paddd(a: __m64, b: __m64) -> __m64 {
_mm_add_pi32(a, b)
}
/// Adds packed 8-bit integers in `a` and `b` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddsb))]
pub unsafe fn _mm_adds_pi8(a: __m64, b: __m64) -> __m64 {
paddsb(a, b)
}
/// Adds packed 8-bit integers in `a` and `b` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddsb))]
pub unsafe fn _m_paddsb(a: __m64, b: __m64) -> __m64 {
_mm_adds_pi8(a, b)
}
/// Adds packed 16-bit integers in `a` and `b` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddsw))]
pub unsafe fn _mm_adds_pi16(a: __m64, b: __m64) -> __m64 {
paddsw(a, b)
}
/// Adds packed 16-bit integers in `a` and `b` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddsw))]
pub unsafe fn _m_paddsw(a: __m64, b: __m64) -> __m64 {
_mm_adds_pi16(a, b)
}
/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddusb))]
pub unsafe fn _mm_adds_pu8(a: __m64, b: __m64) -> __m64 {
paddusb(a, b)
}
/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddusb))]
pub unsafe fn _m_paddusb(a: __m64, b: __m64) -> __m64 {
_mm_adds_pu8(a, b)
}
/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddusw))]
pub unsafe fn _mm_adds_pu16(a: __m64, b: __m64) -> __m64 {
paddusw(a, b)
}
/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(paddusw))]
pub unsafe fn _m_paddusw(a: __m64, b: __m64) -> __m64 {
_mm_adds_pu16(a, b)
}
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubb))]
pub unsafe fn _mm_sub_pi8(a: __m64, b: __m64) -> __m64 {
psubb(a, b)
}
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubb))]
pub unsafe fn _m_psubb(a: __m64, b: __m64) -> __m64 {
_mm_sub_pi8(a, b)
}
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubw))]
pub unsafe fn _mm_sub_pi16(a: __m64, b: __m64) -> __m64 {
psubw(a, b)
}
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubw))]
pub unsafe fn _m_psubw(a: __m64, b: __m64) -> __m64 {
_mm_sub_pi16(a, b)
}
/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubd))]
pub unsafe fn _mm_sub_pi32(a: __m64, b: __m64) -> __m64 {
psubd(a, b)
}
/// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubd))]
pub unsafe fn _m_psubd(a: __m64, b: __m64) -> __m64 {
_mm_sub_pi32(a, b)
}
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
/// using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubsb))]
pub unsafe fn _mm_subs_pi8(a: __m64, b: __m64) -> __m64 {
psubsb(a, b)
}
/// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
/// using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubsb))]
pub unsafe fn _m_psubsb(a: __m64, b: __m64) -> __m64 {
_mm_subs_pi8(a, b)
}
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
/// using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubsw))]
pub unsafe fn _mm_subs_pi16(a: __m64, b: __m64) -> __m64 {
psubsw(a, b)
}
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
/// using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubsw))]
pub unsafe fn _m_psubsw(a: __m64, b: __m64) -> __m64 {
_mm_subs_pi16(a, b)
}
/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
/// integers in `a` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubusb))]
pub unsafe fn _mm_subs_pu8(a: __m64, b: __m64) -> __m64 {
psubusb(a, b)
}
/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
/// integers in `a` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubusb))]
pub unsafe fn _m_psubusb(a: __m64, b: __m64) -> __m64 {
_mm_subs_pu8(a, b)
}
/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
/// 16-bit integers in `a` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubusw))]
pub unsafe fn _mm_subs_pu16(a: __m64, b: __m64) -> __m64 {
psubusw(a, b)
}
/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned
/// 16-bit integers in `a` using saturation.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(psubusw))]
pub unsafe fn _m_psubusw(a: __m64, b: __m64) -> __m64 {
_mm_subs_pu16(a, b)
}
/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
/// using signed saturation.
///
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(packsswb))]
pub unsafe fn _mm_packs_pi16(a: __m64, b: __m64) -> __m64 {
packsswb(a, b)
}
/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
/// using signed saturation.
///
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
/// less than 0x80 are saturated to 0x80.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(packssdw))]
pub unsafe fn _mm_packs_pi32(a: __m64, b: __m64) -> __m64 {
packssdw(a, b)
}
/// Compares whether each element of `a` is greater than the corresponding
/// element of `b` returning `0` for `false` and `-1` for `true`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(pcmpgtb))]
pub unsafe fn _mm_cmpgt_pi8(a: __m64, b: __m64) -> __m64 {
pcmpgtb(a, b)
}
/// Compares whether each element of `a` is greater than the corresponding
/// element of `b` returning `0` for `false` and `-1` for `true`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(pcmpgtw))]
pub unsafe fn _mm_cmpgt_pi16(a: __m64, b: __m64) -> __m64 {
pcmpgtw(a, b)
}
/// Compares whether each element of `a` is greater than the corresponding
/// element of `b` returning `0` for `false` and `-1` for `true`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(pcmpgtd))]
pub unsafe fn _mm_cmpgt_pi32(a: __m64, b: __m64) -> __m64 {
pcmpgtd(a, b)
}
/// Unpacks the upper two elements from two `i16x4` vectors and interleaves
/// them into the result: `[a.2, b.2, a.3, b.3]`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
pub unsafe fn _mm_unpackhi_pi16(a: __m64, b: __m64) -> __m64 {
punpckhwd(a, b)
}
/// Unpacks the upper four elements from two `i8x8` vectors and interleaves
/// them into the result: `[a.4, b.4, a.5, b.5, a.6, b.6, a.7, b.7]`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(punpckhbw))]
pub unsafe fn _mm_unpackhi_pi8(a: __m64, b: __m64) -> __m64 {
punpckhbw(a, b)
}
/// Unpacks the lower four elements from two `i8x8` vectors and interleaves
/// them into the result: `[a.0, b.0, a.1, b.1, a.2, b.2, a.3, b.3]`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(punpcklbw))]
pub unsafe fn _mm_unpacklo_pi8(a: __m64, b: __m64) -> __m64 {
punpcklbw(a, b)
}
/// Unpacks the lower two elements from two `i16x4` vectors and interleaves
/// them into the result: `[a.0 b.0 a.1 b.1]`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(punpcklwd))]
pub unsafe fn _mm_unpacklo_pi16(a: __m64, b: __m64) -> __m64 {
punpcklwd(a, b)
}
/// Unpacks the upper element from two `i32x2` vectors and interleaves them
/// into the result: `[a.1, b.1]`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(punpckhdq))]
pub unsafe fn _mm_unpackhi_pi32(a: __m64, b: __m64) -> __m64 {
punpckhdq(a, b)
}
/// Unpacks the lower element from two `i32x2` vectors and interleaves them
/// into the result: `[a.0, b.0]`.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(punpckldq))]
pub unsafe fn _mm_unpacklo_pi32(a: __m64, b: __m64) -> __m64 {
punpckldq(a, b)
}
/// Sets packed 16-bit integers in dst with the supplied values.
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_set_pi16(e3: i16, e2: i16, e1: i16, e0: i16) -> __m64 {
_mm_setr_pi16(e0, e1, e2, e3)
}
/// Sets packed 32-bit integers in dst with the supplied values.
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_set_pi32(e1: i32, e0: i32) -> __m64 {
_mm_setr_pi32(e0, e1)
}
/// Sets packed 8-bit integers in dst with the supplied values.
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_set_pi8(e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> __m64 {
_mm_setr_pi8(e0, e1, e2, e3, e4, e5, e6, e7)
}
/// Broadcasts 16-bit integer a to all all elements of dst.
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_set1_pi16(a: i16) -> __m64 {
_mm_setr_pi16(a, a, a, a)
}
/// Broadcasts 32-bit integer a to all all elements of dst.
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_set1_pi32(a: i32) -> __m64 {
_mm_setr_pi32(a, a)
}
/// Broadcasts 8-bit integer a to all all elements of dst.
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_set1_pi8(a: i8) -> __m64 {
_mm_setr_pi8(a, a, a, a, a, a, a, a)
}
/// Sets packed 16-bit integers in dst with the supplied values in reverse
/// order.
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_setr_pi16(e0: i16, e1: i16, e2: i16, e3: i16) -> __m64 {
transmute(i16x4::new(e0, e1, e2, e3))
}
/// Sets packed 32-bit integers in dst with the supplied values in reverse
/// order.
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_setr_pi32(e0: i32, e1: i32) -> __m64 {
transmute(i32x2::new(e0, e1))
}
/// Sets packed 8-bit integers in dst with the supplied values in reverse order.
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_setr_pi8(
e0: i8,
e1: i8,
e2: i8,
e3: i8,
e4: i8,
e5: i8,
e6: i8,
e7: i8,
) -> __m64 {
transmute(i8x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
}
/// Empty the MMX state, which marks the x87 FPU registers as available for use
/// by x87 instructions. This instruction must be used at the end of all MMX
/// technology procedures.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(emms))]
pub unsafe fn _mm_empty() {
emms()
}
/// Empty the MMX state, which marks the x87 FPU registers as available for use
/// by x87 instructions. This instruction must be used at the end of all MMX
/// technology procedures.
#[inline]
#[target_feature(enable = "mmx")]
#[cfg_attr(test, assert_instr(emms))]
pub unsafe fn _m_empty() {
emms()
}
/// Copies 32-bit integer `a` to the lower elements of the return value, and zero
/// the upper element of the return value.
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_cvtsi32_si64(a: i32) -> __m64 {
transmute(i32x2::new(a, 0))
}
/// Return the lower 32-bit integer in `a`.
#[inline]
#[target_feature(enable = "mmx")]
pub unsafe fn _mm_cvtsi64_si32(a: __m64) -> i32 {
let r: i32x2 = transmute(a);
r.0
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.mmx.padd.b"]
fn paddb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.padd.w"]
fn paddw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.padd.d"]
fn paddd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.padds.b"]
fn paddsb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.padds.w"]
fn paddsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.paddus.b"]
fn paddusb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.paddus.w"]
fn paddusw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psub.b"]
fn psubb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psub.w"]
fn psubw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psub.d"]
fn psubd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psubs.b"]
fn psubsb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psubs.w"]
fn psubsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psubus.b"]
fn psubusb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psubus.w"]
fn psubusw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.packsswb"]
fn packsswb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.packssdw"]
fn packssdw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pcmpgt.b"]
fn pcmpgtb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pcmpgt.w"]
fn pcmpgtw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pcmpgt.d"]
fn pcmpgtd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckhwd"]
fn punpckhwd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpcklwd"]
fn punpcklwd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckhbw"]
fn punpckhbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpcklbw"]
fn punpcklbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckhdq"]
fn punpckhdq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckldq"]
fn punpckldq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.emms"]
fn emms();
}
#[cfg(test)]
mod tests {
use crate::core_arch::x86::*;
use stdarch_test::simd_test;
#[simd_test(enable = "mmx")]
unsafe fn test_mm_setzero_si64() {
let r: __m64 = transmute(0_i64);
assert_eq_m64(r, _mm_setzero_si64());
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_add_pi8() {
let a = _mm_setr_pi8(-1, -1, 1, 1, -1, 0, 1, 0);
let b = _mm_setr_pi8(-127, 101, 99, 126, 0, -1, 0, 1);
let e = _mm_setr_pi8(-128, 100, 100, 127, -1, -1, 1, 1);
assert_eq_m64(e, _mm_add_pi8(a, b));
assert_eq_m64(e, _m_paddb(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_add_pi16() {
let a = _mm_setr_pi16(-1, -1, 1, 1);
let b = _mm_setr_pi16(i16::MIN + 1, 30001, -30001, i16::MAX - 1);
let e = _mm_setr_pi16(i16::MIN, 30000, -30000, i16::MAX);
assert_eq_m64(e, _mm_add_pi16(a, b));
assert_eq_m64(e, _m_paddw(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_add_pi32() {
let a = _mm_setr_pi32(1, -1);
let b = _mm_setr_pi32(i32::MAX - 1, i32::MIN + 1);
let e = _mm_setr_pi32(i32::MAX, i32::MIN);
assert_eq_m64(e, _mm_add_pi32(a, b));
assert_eq_m64(e, _m_paddd(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_adds_pi8() {
let a = _mm_setr_pi8(-100, -1, 1, 100, -1, 0, 1, 0);
let b = _mm_setr_pi8(-100, 1, -1, 100, 0, -1, 0, 1);
let e = _mm_setr_pi8(i8::MIN, 0, 0, i8::MAX, -1, -1, 1, 1);
assert_eq_m64(e, _mm_adds_pi8(a, b));
assert_eq_m64(e, _m_paddsb(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_adds_pi16() {
let a = _mm_setr_pi16(-32000, 32000, 4, 0);
let b = _mm_setr_pi16(-32000, 32000, -5, 1);
let e = _mm_setr_pi16(i16::MIN, i16::MAX, -1, 1);
assert_eq_m64(e, _mm_adds_pi16(a, b));
assert_eq_m64(e, _m_paddsw(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_adds_pu8() {
let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 200u8 as i8);
let b = _mm_setr_pi8(0, 10, 20, 30, 40, 50, 60, 200u8 as i8);
let e = _mm_setr_pi8(0, 11, 22, 33, 44, 55, 66, u8::MAX as i8);
assert_eq_m64(e, _mm_adds_pu8(a, b));
assert_eq_m64(e, _m_paddusb(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_adds_pu16() {
let a = _mm_setr_pi16(0, 1, 2, 60000u16 as i16);
let b = _mm_setr_pi16(0, 10, 20, 60000u16 as i16);
let e = _mm_setr_pi16(0, 11, 22, u16::MAX as i16);
assert_eq_m64(e, _mm_adds_pu16(a, b));
assert_eq_m64(e, _m_paddusw(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_sub_pi8() {
let a = _mm_setr_pi8(0, 0, 1, 1, -1, -1, 0, 0);
let b = _mm_setr_pi8(-1, 1, -2, 2, 100, -100, -127, 127);
let e = _mm_setr_pi8(1, -1, 3, -1, -101, 99, 127, -127);
assert_eq_m64(e, _mm_sub_pi8(a, b));
assert_eq_m64(e, _m_psubb(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_sub_pi16() {
let a = _mm_setr_pi16(-20000, -20000, 20000, 30000);
let b = _mm_setr_pi16(-10000, 10000, -10000, 30000);
let e = _mm_setr_pi16(-10000, -30000, 30000, 0);
assert_eq_m64(e, _mm_sub_pi16(a, b));
assert_eq_m64(e, _m_psubw(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_sub_pi32() {
let a = _mm_setr_pi32(500_000, -500_000);
let b = _mm_setr_pi32(500_000, 500_000);
let e = _mm_setr_pi32(0, -1_000_000);
assert_eq_m64(e, _mm_sub_pi32(a, b));
assert_eq_m64(e, _m_psubd(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_subs_pi8() {
let a = _mm_setr_pi8(-100, 100, 0, 0, 0, 0, -5, 5);
let b = _mm_setr_pi8(100, -100, i8::MIN, 127, -1, 1, 3, -3);
let e = _mm_setr_pi8(i8::MIN, i8::MAX, i8::MAX, -127, 1, -1, -8, 8);
assert_eq_m64(e, _mm_subs_pi8(a, b));
assert_eq_m64(e, _m_psubsb(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_subs_pi16() {
let a = _mm_setr_pi16(-20000, 20000, 0, 0);
let b = _mm_setr_pi16(20000, -20000, -1, 1);
let e = _mm_setr_pi16(i16::MIN, i16::MAX, 1, -1);
assert_eq_m64(e, _mm_subs_pi16(a, b));
assert_eq_m64(e, _m_psubsw(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_subs_pu8() {
let a = _mm_setr_pi8(50, 10, 20, 30, 40, 60, 70, 80);
let b = _mm_setr_pi8(60, 20, 30, 40, 30, 20, 10, 0);
let e = _mm_setr_pi8(0, 0, 0, 0, 10, 40, 60, 80);
assert_eq_m64(e, _mm_subs_pu8(a, b));
assert_eq_m64(e, _m_psubusb(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_subs_pu16() {
let a = _mm_setr_pi16(10000, 200, 0, 44444u16 as i16);
let b = _mm_setr_pi16(20000, 300, 1, 11111);
let e = _mm_setr_pi16(0, 0, 0, 33333u16 as i16);
assert_eq_m64(e, _mm_subs_pu16(a, b));
assert_eq_m64(e, _m_psubusw(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_packs_pi16() {
let a = _mm_setr_pi16(-1, 2, -3, 4);
let b = _mm_setr_pi16(-5, 6, -7, 8);
let r = _mm_setr_pi8(-1, 2, -3, 4, -5, 6, -7, 8);
assert_eq_m64(r, _mm_packs_pi16(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_packs_pi32() {
let a = _mm_setr_pi32(-1, 2);
let b = _mm_setr_pi32(-5, 6);
let r = _mm_setr_pi16(-1, 2, -5, 6);
assert_eq_m64(r, _mm_packs_pi32(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_cmpgt_pi8() {
let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm_setr_pi8(8, 7, 6, 5, 4, 3, 2, 1);
let r = _mm_setr_pi8(0, 0, 0, 0, 0, -1, -1, -1);
assert_eq_m64(r, _mm_cmpgt_pi8(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_cmpgt_pi16() {
let a = _mm_setr_pi16(0, 1, 2, 3);
let b = _mm_setr_pi16(4, 3, 2, 1);
let r = _mm_setr_pi16(0, 0, 0, -1);
assert_eq_m64(r, _mm_cmpgt_pi16(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_cmpgt_pi32() {
let a = _mm_setr_pi32(0, 3);
let b = _mm_setr_pi32(1, 2);
let r0 = _mm_setr_pi32(0, -1);
let r1 = _mm_setr_pi32(-1, 0);
assert_eq_m64(r0, _mm_cmpgt_pi32(a, b));
assert_eq_m64(r1, _mm_cmpgt_pi32(b, a));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_unpackhi_pi8() {
let a = _mm_setr_pi8(0, 3, 4, 7, 8, 11, 12, 15);
let b = _mm_setr_pi8(1, 2, 5, 6, 9, 10, 13, 14);
let r = _mm_setr_pi8(8, 9, 11, 10, 12, 13, 15, 14);
assert_eq_m64(r, _mm_unpackhi_pi8(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_unpacklo_pi8() {
let a = _mm_setr_pi8(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm_setr_pi8(8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm_setr_pi8(0, 8, 1, 9, 2, 10, 3, 11);
assert_eq_m64(r, _mm_unpacklo_pi8(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_unpackhi_pi16() {
let a = _mm_setr_pi16(0, 1, 2, 3);
let b = _mm_setr_pi16(4, 5, 6, 7);
let r = _mm_setr_pi16(2, 6, 3, 7);
assert_eq_m64(r, _mm_unpackhi_pi16(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_unpacklo_pi16() {
let a = _mm_setr_pi16(0, 1, 2, 3);
let b = _mm_setr_pi16(4, 5, 6, 7);
let r = _mm_setr_pi16(0, 4, 1, 5);
assert_eq_m64(r, _mm_unpacklo_pi16(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_unpackhi_pi32() {
let a = _mm_setr_pi32(0, 3);
let b = _mm_setr_pi32(1, 2);
let r = _mm_setr_pi32(3, 2);
assert_eq_m64(r, _mm_unpackhi_pi32(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_unpacklo_pi32() {
let a = _mm_setr_pi32(0, 3);
let b = _mm_setr_pi32(1, 2);
let r = _mm_setr_pi32(0, 1);
assert_eq_m64(r, _mm_unpacklo_pi32(a, b));
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_empty() {
_mm_empty();
}
#[simd_test(enable = "mmx")]
unsafe fn test_m_empty() {
_m_empty();
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_cvtsi32_si64() {
let a = _mm_cvtsi32_si64(42);
let b = _mm_setr_pi32(42, 0);
assert_eq_m64(a, b);
}
#[simd_test(enable = "mmx")]
unsafe fn test_mm_cvtsi64_si32() {
let a = _mm_setr_pi32(42, 666);
let b = _mm_cvtsi64_si32(a);
assert_eq!(b, 42);
}
}

View file

@ -6,50 +6,6 @@ use crate::{intrinsics, marker::Sized, mem::transmute};
mod macros;
types! {
/// 64-bit wide integer vector type, x86-specific
///
/// This type is the same as the `__m64` type defined by Intel,
/// representing a 64-bit SIMD register. Usage of this type typically
/// corresponds to the `mmx` target feature.
///
/// Internally this type may be viewed as:
///
/// * `i8x8` - eight `i8` variables packed together
/// * `i16x4` - four `i16` variables packed together
/// * `i32x2` - two `i32` variables packed together
///
/// (as well as unsigned versions). Each intrinsic may interpret the
/// internal bits differently, check the documentation of the intrinsic
/// to see how it's being used.
///
/// Note that this means that an instance of `__m64` typically just means
/// a "bag of bits" which is left up to interpretation at the point of use.
///
/// Most intrinsics using `__m64` are prefixed with `_mm_` and the
/// integer types tend to correspond to suffixes like "pi8" or "pi32" (not
/// to be confused with "epiXX", used for `__m128i`).
///
/// # Examples
///
/// ```
/// # #![feature(stdsimd, mmx_target_feature)]
/// #[cfg(target_arch = "x86")]
/// use std::arch::x86::*;
/// #[cfg(target_arch = "x86_64")]
/// use std::arch::x86_64::*;
///
/// # fn main() {
/// # #[target_feature(enable = "mmx")]
/// # unsafe fn foo() {
/// let all_bytes_zero = _mm_setzero_si64();
/// let all_bytes_one = _mm_set1_pi8(1);
/// let two_i32 = _mm_set_pi32(1, 2);
/// # }
/// # if is_x86_feature_detected!("mmx") { unsafe { foo() } }
/// # }
/// ```
pub struct __m64(i64);
/// 128-bit wide integer vector type, x86-specific
///
/// This type is the same as the `__m128i` type defined by Intel,
@ -359,49 +315,6 @@ mod test;
#[cfg(test)]
pub use self::test::*;
#[allow(non_camel_case_types)]
#[unstable(feature = "stdimd_internal", issue = "none")]
pub(crate) trait m64Ext: Sized {
fn as_m64(self) -> __m64;
#[inline]
fn as_u8x8(self) -> crate::core_arch::simd::u8x8 {
unsafe { transmute(self.as_m64()) }
}
#[inline]
fn as_u16x4(self) -> crate::core_arch::simd::u16x4 {
unsafe { transmute(self.as_m64()) }
}
#[inline]
fn as_u32x2(self) -> crate::core_arch::simd::u32x2 {
unsafe { transmute(self.as_m64()) }
}
#[inline]
fn as_i8x8(self) -> crate::core_arch::simd::i8x8 {
unsafe { transmute(self.as_m64()) }
}
#[inline]
fn as_i16x4(self) -> crate::core_arch::simd::i16x4 {
unsafe { transmute(self.as_m64()) }
}
#[inline]
fn as_i32x2(self) -> crate::core_arch::simd::i32x2 {
unsafe { transmute(self.as_m64()) }
}
}
impl m64Ext for __m64 {
#[inline]
fn as_m64(self) -> Self {
self
}
}
#[allow(non_camel_case_types)]
#[unstable(feature = "stdimd_internal", issue = "none")]
pub(crate) trait m128iExt: Sized {
@ -649,9 +562,6 @@ mod tbm;
#[cfg(not(stdarch_intel_sde))]
pub use self::tbm::*;
mod mmx;
pub use self::mmx::*;
mod pclmulqdq;
pub use self::pclmulqdq::*;

View file

@ -1115,33 +1115,6 @@ pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
movmskps(a)
}
/// Sets the upper two single-precision floating-point values with 64 bits of
/// data loaded from the address `p`; the lower two values are passed through
/// from `a`.
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movhps))]
// TODO: this function is actually not limited to floats, but that's what
// what matches the C type most closely: `(__m128, *const __m64) -> __m128`.
pub unsafe fn _mm_loadh_pi(a: __m128, p: *const __m64) -> __m128 {
let q = p as *const f32x2;
let b: f32x2 = *q;
let bb = simd_shuffle4(b, b, [0, 1, 0, 1]);
simd_shuffle4(a, bb, [0, 1, 4, 5])
}
/// Loads two floats from `p` into the lower half of a `__m128`. The upper half
/// is copied from the upper half of `a`.
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(movlps))]
pub unsafe fn _mm_loadl_pi(a: __m128, p: *const __m64) -> __m128 {
let q = p as *const f32x2;
let b: f32x2 = *q;
let bb = simd_shuffle4(b, b, [0, 1, 0, 1]);
simd_shuffle4(a, bb, [4, 5, 2, 3])
}
/// Construct a `__m128` with the lowest element read from `p` and the other
/// elements set to zero.
///
@ -1270,72 +1243,6 @@ pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i {
transmute(i64x2(0, ptr::read_unaligned(mem_addr as *const i64)))
}
/// Stores the upper half of `a` (64 bits) into memory.
///
/// This intrinsic corresponds to the `MOVHPS` instruction. The compiler may
/// choose to generate an equivalent sequence of other instructions.
#[inline]
#[target_feature(enable = "sse")]
// On i686 and up LLVM actually generates MOVHPD instead of MOVHPS, that's
// fine.
// On i586 (no SSE2) it just generates plain MOV instructions.
#[cfg_attr(
all(test, any(target_arch = "x86_64", target_feature = "sse2"),
not(target_os = "windows")),
// assert_instr(movhpd)
assert_instr(movhps) // LLVM7 prefers single-precision instructions
)]
pub unsafe fn _mm_storeh_pi(p: *mut __m64, a: __m128) {
#[cfg(target_arch = "x86")]
{
// If this is a `f64x2` then on i586, LLVM generates fldl & fstpl which
// is just silly
let a64: u64x2 = mem::transmute(a);
let a_hi = a64.extract(1);
*(p as *mut u64) = a_hi;
}
#[cfg(target_arch = "x86_64")]
{
// If this is a `u64x2` LLVM generates a pshufd + movq, but we really
// want a a MOVHPD or MOVHPS here.
let a64: f64x2 = mem::transmute(a);
let a_hi = a64.extract(1);
*p = mem::transmute(a_hi);
}
}
/// Stores the lower half of `a` (64 bits) into memory.
///
/// This intrinsic corresponds to the `MOVQ` instruction. The compiler may
/// choose to generate an equivalent sequence of other instructions.
#[inline]
#[target_feature(enable = "sse")]
// On i586 the codegen just generates plane MOVs. No need to test for that.
#[cfg_attr(
all(
test,
any(target_arch = "x86_64", target_feature = "sse2"),
not(target_os = "windows")
),
assert_instr(movlps)
)]
pub unsafe fn _mm_storel_pi(p: *mut __m64, a: __m128) {
#[cfg(target_arch = "x86")]
{
// Same as for _mm_storeh_pi: i586 code gen would use floating point
// stack.
let a64: u64x2 = mem::transmute(a);
let a_hi = a64.extract(0);
*(p as *mut u64) = a_hi;
}
#[cfg(target_arch = "x86_64")]
{
let a64: f64x2 = mem::transmute(a);
let a_hi = a64.extract(0);
*p = mem::transmute(a_hi);
}
}
/// Stores the lowest 32 bit float of `a` into memory.
///
/// This intrinsic corresponds to the `MOVSS` instruction.
@ -1985,42 +1892,6 @@ extern "C" {
fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
#[link_name = "llvm.x86.sse.cmp.ss"]
fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
#[link_name = "llvm.x86.mmx.movnt.dq"]
fn movntdq(a: *mut __m64, b: __m64);
#[link_name = "llvm.x86.sse.cvtpi2ps"]
fn cvtpi2ps(a: __m128, b: __m64) -> __m128;
#[link_name = "llvm.x86.mmx.maskmovq"]
fn maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8);
#[link_name = "llvm.x86.mmx.pextr.w"]
fn pextrw(a: __m64, imm8: i32) -> i32;
#[link_name = "llvm.x86.mmx.pinsr.w"]
fn pinsrw(a: __m64, d: i32, imm8: i32) -> __m64;
#[link_name = "llvm.x86.mmx.pmovmskb"]
fn pmovmskb(a: __m64) -> i32;
#[link_name = "llvm.x86.sse.pshuf.w"]
fn pshufw(a: __m64, imm8: i8) -> __m64;
#[link_name = "llvm.x86.mmx.pmaxs.w"]
fn pmaxsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmaxu.b"]
fn pmaxub(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmins.w"]
fn pminsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pminu.b"]
fn pminub(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmulhu.w"]
fn pmulhuw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmull.w"]
fn pmullw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pavg.b"]
fn pavgb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pavg.w"]
fn pavgw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psad.bw"]
fn psadbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.sse.cvtps2pi"]
fn cvtps2pi(a: __m128) -> __m64;
#[link_name = "llvm.x86.sse.cvttps2pi"]
fn cvttps2pi(a: __m128) -> __m64;
}
/// Stores `a` into the memory at `mem_addr` using a non-temporal memory hint.
@ -2038,463 +1909,6 @@ pub unsafe fn _mm_stream_ps(mem_addr: *mut f32, a: __m128) {
intrinsics::nontemporal_store(mem_addr as *mut __m128, a);
}
/// Stores 64-bits of integer data from a into memory using a non-temporal
/// memory hint.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(movntq))]
pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) {
movntdq(mem_addr, a)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
pmaxsw(a, b)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
_mm_max_pi16(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
pmaxub(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
_mm_max_pu8(a, b)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
pminsw(a, b)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
_mm_min_pi16(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
pminub(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
_mm_min_pu8(a, b)
}
/// Multiplies packed 16-bit unsigned integer values and writes the
/// high-order 16 bits of each 32-bit product to the corresponding bits in
/// the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
pmulhuw(a, b)
}
/// Multiplies packed 16-bit integer values and writes the
/// low-order 16 bits of each 32-bit product to the corresponding bits in
/// the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmullw))]
pub unsafe fn _mm_mullo_pi16(a: __m64, b: __m64) -> __m64 {
pmullw(a, b)
}
/// Multiplies packed 16-bit unsigned integer values and writes the
/// high-order 16 bits of each 32-bit product to the corresponding bits in
/// the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
_mm_mulhi_pu16(a, b)
}
/// Computes the rounded averages of the packed unsigned 8-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
pavgb(a, b)
}
/// Computes the rounded averages of the packed unsigned 8-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu8(a, b)
}
/// Computes the rounded averages of the packed unsigned 16-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
pavgw(a, b)
}
/// Computes the rounded averages of the packed unsigned 16-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu16(a, b)
}
/// Subtracts the corresponding 8-bit unsigned integer values of the two
/// 64-bit vector operands and computes the absolute value for each of the
/// difference. Then sum of the 8 absolute differences is written to the
/// bits `[15:0]` of the destination; the remaining bits `[63:16]` are cleared.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
psadbw(a, b)
}
/// Subtracts the corresponding 8-bit unsigned integer values of the two
/// 64-bit vector operands and computes the absolute value for each of the
/// difference. Then sum of the 8 absolute differences is written to the
/// bits `[15:0]` of the destination; the remaining bits `[63:16]` are cleared.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
_mm_sad_pu8(a, b)
}
/// Converts two elements of a 64-bit vector of `[2 x i32]` into two
/// floating point values and writes them to the lower 64-bits of the
/// destination. The remaining higher order elements of the destination are
/// copied from the corresponding elements in the first operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
cvtpi2ps(a, b)
}
/// Converts two elements of a 64-bit vector of `[2 x i32]` into two
/// floating point values and writes them to the lower 64-bits of the
/// destination. The remaining higher order elements of the destination are
/// copied from the corresponding elements in the first operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 {
_mm_cvtpi32_ps(a, b)
}
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_cmpgt_pi8(b, a);
let b = _mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_cmpgt_pi16(b, a);
let c = _mm_unpackhi_pi16(a, b);
let r = _mm_setzero_ps();
let r = cvtpi2ps(r, c);
let r = _mm_movelh_ps(r, r);
let c = _mm_unpacklo_pi16(a, b);
cvtpi2ps(r, c)
}
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let c = _mm_unpackhi_pi16(a, b);
let r = _mm_setzero_ps();
let r = cvtpi2ps(r, c);
let r = _mm_movelh_ps(r, r);
let c = _mm_unpacklo_pi16(a, b);
cvtpi2ps(r, c)
}
/// Converts the two 32-bit signed integer values from each 64-bit vector
/// operand of `[2 x i32]` into a 128-bit vector of `[4 x float]`.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
let c = _mm_setzero_ps();
let c = _mm_cvtpi32_ps(c, b);
let c = _mm_movelh_ps(c, c);
_mm_cvtpi32_ps(c, a)
}
/// Conditionally copies the values from each 8-bit element in the first
/// 64-bit integer vector operand to the specified memory location, as
/// specified by the most significant bit in the corresponding element in the
/// second 64-bit integer vector operand.
///
/// To minimize caching, the data is flagged as non-temporal
/// (unlikely to be used again soon).
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
maskmovq(a, mask, mem_addr)
}
/// Conditionally copies the values from each 8-bit element in the first
/// 64-bit integer vector operand to the specified memory location, as
/// specified by the most significant bit in the corresponding element in the
/// second 64-bit integer vector operand.
///
/// To minimize caching, the data is flagged as non-temporal
/// (unlikely to be used again soon).
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
_mm_maskmove_si64(a, mask, mem_addr)
}
/// Extracts 16-bit element from a 64-bit vector of `[4 x i16]` and
/// returns it, as specified by the immediate integer operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 {
macro_rules! call {
($imm2:expr) => {
pextrw(a, $imm2) as i32
};
}
constify_imm2!(imm2, call)
}
/// Extracts 16-bit element from a 64-bit vector of `[4 x i16]` and
/// returns it, as specified by the immediate integer operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 {
macro_rules! call {
($imm2:expr) => {
pextrw(a, $imm2) as i32
};
}
constify_imm2!(imm2, call)
}
/// Copies data from the 64-bit vector of `[4 x i16]` to the destination,
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
/// specified by the immediate operand `n`.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
macro_rules! call {
($imm2:expr) => {
pinsrw(a, d, $imm2)
};
}
constify_imm2!(imm2, call)
}
/// Copies data from the 64-bit vector of `[4 x i16]` to the destination,
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
/// specified by the immediate operand `n`.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
macro_rules! call {
($imm2:expr) => {
pinsrw(a, d, $imm2)
};
}
constify_imm2!(imm2, call)
}
/// Takes the most significant bit from each 8-bit element in a 64-bit
/// integer vector to create a 16-bit mask value. Zero-extends the value to
/// 32-bit integer and writes it to the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
pmovmskb(a)
}
/// Takes the most significant bit from each 8-bit element in a 64-bit
/// integer vector to create a 16-bit mask value. Zero-extends the value to
/// 32-bit integer and writes it to the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
_mm_movemask_pi8(a)
}
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
/// destination, as specified by the immediate value operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => {
pshufw(a, $imm8)
};
}
constify_imm8!(imm8, call)
}
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
/// destination, as specified by the immediate value operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => {
pshufw(a, $imm8)
};
}
constify_imm8!(imm8, call)
}
/// Converts the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
cvttps2pi(a)
}
/// Converts the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
_mm_cvttps_pi32(a)
}
/// Converts the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 {
cvtps2pi(a)
}
/// Converts the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 {
_mm_cvtps_pi32(a)
}
/// Converts packed single-precision (32-bit) floating-point elements in `a` to
/// packed 16-bit integers.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 {
let b = _mm_cvtps_pi32(a);
let a = _mm_movehl_ps(a, a);
let c = _mm_cvtps_pi32(a);
_mm_packs_pi32(b, c)
}
/// Converts packed single-precision (32-bit) floating-point elements in `a` to
/// packed 8-bit integers, and returns theem in the lower 4 elements of the
/// result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
let b = _mm_cvtps_pi16(a);
let c = _mm_setzero_si64();
_mm_packs_pi16(b, c)
}
#[cfg(test)]
mod tests {
use crate::{hint::black_box, mem::transmute};
@ -3593,24 +3007,6 @@ mod tests {
assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_loadh_pi() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
let p = x[..].as_ptr();
let r = _mm_loadh_pi(a, p as *const _);
assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 5.0, 6.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_loadl_pi() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let x: [f32; 4] = [5.0, 6.0, 7.0, 8.0];
let p = x[..].as_ptr();
let r = _mm_loadl_pi(a, p as *const _);
assert_eq_m128(r, _mm_setr_ps(5.0, 6.0, 3.0, 4.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_load_ss() {
let a = 42.0f32;
@ -3684,28 +3080,6 @@ mod tests {
assert_eq_m128i(r, _mm_set_epi64x(5, 0));
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_storeh_pi() {
let mut vals = [0.0f32; 8];
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
_mm_storeh_pi(vals.as_mut_ptr() as *mut _, a);
assert_eq!(vals[0], 3.0);
assert_eq!(vals[1], 4.0);
assert_eq!(vals[2], 0.0);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_storel_pi() {
let mut vals = [0.0f32; 8];
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
_mm_storel_pi(vals.as_mut_ptr() as *mut _, a);
assert_eq!(vals[0], 1.0);
assert_eq!(vals[1], 2.0);
assert_eq!(vals[2], 0.0);
}
#[simd_test(enable = "sse")]
unsafe fn test_mm_store_ss() {
let mut vals = [0.0f32; 8];
@ -3926,254 +3300,4 @@ mod tests {
assert_eq!(mem.data[i], get_m128(a, i));
}
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_stream_pi() {
let a = transmute(i8x8::new(0, 0, 0, 0, 0, 0, 0, 7));
let mut mem = boxed::Box::<__m64>::new(transmute(i8x8::splat(1)));
_mm_stream_pi(&mut *mem as *mut _ as *mut _, a);
assert_eq_m64(a, *mem);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_max_pi16() {
let a = _mm_setr_pi16(-1, 6, -3, 8);
let b = _mm_setr_pi16(5, -2, 7, -4);
let r = _mm_setr_pi16(5, 6, 7, 8);
assert_eq_m64(r, _mm_max_pi16(a, b));
assert_eq_m64(r, _m_pmaxsw(a, b));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_max_pu8() {
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8);
assert_eq_m64(r, _mm_max_pu8(a, b));
assert_eq_m64(r, _m_pmaxub(a, b));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_min_pi16() {
let a = _mm_setr_pi16(-1, 6, -3, 8);
let b = _mm_setr_pi16(5, -2, 7, -4);
let r = _mm_setr_pi16(-1, -2, -3, -4);
assert_eq_m64(r, _mm_min_pi16(a, b));
assert_eq_m64(r, _m_pminsw(a, b));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_min_pu8() {
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4);
assert_eq_m64(r, _mm_min_pu8(a, b));
assert_eq_m64(r, _m_pminub(a, b));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_mulhi_pu16() {
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _mm_mulhi_pu16(a, b);
assert_eq_m64(r, _mm_set1_pi16(15));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_mullo_pi16() {
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _mm_mullo_pi16(a, b);
assert_eq_m64(r, _mm_set1_pi16(17960));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_m_pmulhuw() {
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _m_pmulhuw(a, b);
assert_eq_m64(r, _mm_set1_pi16(15));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_avg_pu8() {
let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9));
let r = _mm_avg_pu8(a, b);
assert_eq_m64(r, _mm_set1_pi8(6));
let r = _m_pavgb(a, b);
assert_eq_m64(r, _mm_set1_pi8(6));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_avg_pu16() {
let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9));
let r = _mm_avg_pu16(a, b);
assert_eq_m64(r, _mm_set1_pi16(6));
let r = _m_pavgw(a, b);
assert_eq_m64(r, _mm_set1_pi16(6));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_sad_pu8() {
#[rustfmt::skip]
let a = _mm_setr_pi8(
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
1, 2, 3, 4,
);
let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1);
let r = _mm_sad_pu8(a, b);
assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
let r = _m_psadbw(a, b);
assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpi32_ps() {
let a = _mm_setr_ps(0., 0., 3., 4.);
let b = _mm_setr_pi32(1, 2);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi32_ps(a, b);
assert_eq_m128(r, expected);
let r = _mm_cvt_pi2ps(a, b);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpi16_ps() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi16_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpu16_ps() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpu16_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpi8_ps() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi8_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpu8_ps() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpu8_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtpi32x2_ps() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi32x2_ps(a, b);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_maskmove_si64() {
let a = _mm_set1_pi8(9);
let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0);
let mut r = _mm_set1_pi8(0);
_mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8);
let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0);
assert_eq_m64(r, e);
let mut r = _mm_set1_pi8(0);
_m_maskmovq(a, mask, &mut r as *mut _ as *mut i8);
assert_eq_m64(r, e);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_extract_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_extract_pi16(a, 0);
assert_eq!(r, 1);
let r = _mm_extract_pi16(a, 1);
assert_eq!(r, 2);
let r = _m_pextrw(a, 1);
assert_eq!(r, 2);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_insert_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_insert_pi16(a, 0, 0b0);
let expected = _mm_setr_pi16(0, 2, 3, 4);
assert_eq_m64(r, expected);
let r = _mm_insert_pi16(a, 0, 0b10);
let expected = _mm_setr_pi16(1, 2, 0, 4);
assert_eq_m64(r, expected);
let r = _m_pinsrw(a, 0, 0b10);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_movemask_pi8() {
let a = _mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
let r = _mm_movemask_pi8(a);
assert_eq!(r, 0b10001);
let r = _m_pmovmskb(a);
assert_eq!(r, 0b10001);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_shuffle_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_shuffle_pi16(a, 0b00_01_01_11);
let expected = _mm_setr_pi16(4, 2, 2, 1);
assert_eq_m64(r, expected);
let r = _m_pshufw(a, 0b00_01_01_11);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtps_pi32() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi32(1, 2);
assert_eq_m64(r, _mm_cvtps_pi32(a));
assert_eq_m64(r, _mm_cvt_ps2pi(a));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvttps_pi32() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi32(7, 2);
assert_eq_m64(r, _mm_cvttps_pi32(a));
assert_eq_m64(r, _mm_cvtt_ps2pi(a));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtps_pi16() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi16(7, 2, 3, 4);
assert_eq_m64(r, _mm_cvtps_pi16(a));
}
#[simd_test(enable = "sse,mmx")]
unsafe fn test_mm_cvtps_pi8() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0);
assert_eq_m64(r, _mm_cvtps_pi8(a));
}
}

View file

@ -2958,113 +2958,6 @@ pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
simd_shuffle2(a, b, [0, 2])
}
/// Adds two signed or unsigned 64-bit integer values, returning the
/// lower 64 bits of the sum.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(paddq))]
pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
paddq(a, b)
}
/// Multiplies 32-bit unsigned integer values contained in the lower bits
/// of the two 64-bit integer vectors and returns the 64-bit unsigned
/// product.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(pmuludq))]
pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
pmuludq2(a, b)
}
/// Subtracts signed or unsigned 64-bit integer values and writes the
/// difference to the corresponding bits in the destination.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(psubq))]
pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
psubq(a, b)
}
/// Converts the two signed 32-bit integer elements of a 64-bit vector of
/// `[2 x i32]` into two double-precision floating-point values, returned in a
/// 128-bit vector of `[2 x double]`.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2pd))]
pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d {
cvtpi2pd(a)
}
/// Initializes both 64-bit values in a 128-bit vector of `[2 x i64]` with
/// the specified 64-bit integer values.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// no particular instruction to test
pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i {
_mm_set_epi64x(transmute(e1), transmute(e0))
}
/// Initializes both values in a 128-bit vector of `[2 x i64]` with the
/// specified 64-bit value.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// no particular instruction to test
pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i {
_mm_set_epi64x(transmute(a), transmute(a))
}
/// Constructs a 128-bit integer vector, initialized in reverse order
/// with the specified 64-bit integral values.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// no particular instruction to test
pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i {
_mm_set_epi64x(transmute(e0), transmute(e1))
}
/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
/// integer.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
// instr?
pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 {
transmute(simd_extract::<_, i64>(a.as_i64x2(), 0))
}
/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
/// upper bits.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
// instr?
pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i {
_mm_set_epi64x(0, transmute(a))
}
/// Converts the two double-precision floating-point elements of a
/// 128-bit vector of `[2 x double]` into two signed 32-bit integer values,
/// returned in a 64-bit vector of `[2 x i32]`.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(cvtpd2pi))]
pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 {
cvtpd2pi(a)
}
/// Converts the two double-precision floating-point elements of a
/// 128-bit vector of `[2 x double]` into two signed 32-bit integer values,
/// returned in a 64-bit vector of `[2 x i32]`.
/// If the result of either conversion is inexact, the result is truncated
/// (rounded towards zero) regardless of the current MXCSR setting.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(cvttpd2pi))]
pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 {
cvttpd2pi(a)
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.sse2.pause"]
@ -3207,18 +3100,6 @@ extern "C" {
fn storeudq(mem_addr: *mut i8, a: __m128i);
#[link_name = "llvm.x86.sse2.storeu.pd"]
fn storeupd(mem_addr: *mut i8, a: __m128d);
#[link_name = "llvm.x86.mmx.padd.q"]
fn paddq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmulu.dq"]
fn pmuludq2(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psub.q"]
fn psubq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.sse.cvtpi2pd"]
fn cvtpi2pd(a: __m64) -> __m128d;
#[link_name = "llvm.x86.sse.cvtpd2pi"]
fn cvtpd2pi(a: __m128d) -> __m64;
#[link_name = "llvm.x86.sse.cvttpd2pi"]
fn cvttpd2pi(a: __m128d) -> __m64;
}
#[cfg(test)]
@ -5208,87 +5089,4 @@ mod tests {
let r = _mm_castsi128_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test(enable = "sse2,mmx")]
unsafe fn test_mm_add_si64() {
let a = 1i64;
let b = 2i64;
let expected = 3i64;
let r = _mm_add_si64(transmute(a), transmute(b));
assert_eq!(transmute::<__m64, i64>(r), expected);
}
#[simd_test(enable = "sse2,mmx")]
unsafe fn test_mm_mul_su32() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(3, 4);
let expected = 3u64;
let r = _mm_mul_su32(a, b);
assert_eq_m64(r, transmute(expected));
}
#[simd_test(enable = "sse2,mmx")]
unsafe fn test_mm_sub_si64() {
let a = 1i64;
let b = 2i64;
let expected = -1i64;
let r = _mm_sub_si64(transmute(a), transmute(b));
assert_eq!(transmute::<__m64, i64>(r), expected);
}
#[simd_test(enable = "sse2,mmx")]
unsafe fn test_mm_cvtpi32_pd() {
let a = _mm_setr_pi32(1, 2);
let expected = _mm_setr_pd(1., 2.);
let r = _mm_cvtpi32_pd(a);
assert_eq_m128d(r, expected);
}
#[simd_test(enable = "sse2,mmx")]
unsafe fn test_mm_set_epi64() {
let r = _mm_set_epi64(transmute(1i64), transmute(2i64));
assert_eq_m128i(r, _mm_setr_epi64x(2, 1));
}
#[simd_test(enable = "sse2,mmx")]
unsafe fn test_mm_set1_epi64() {
let r = _mm_set1_epi64(transmute(1i64));
assert_eq_m128i(r, _mm_setr_epi64x(1, 1));
}
#[simd_test(enable = "sse2,mmx")]
unsafe fn test_mm_setr_epi64() {
let r = _mm_setr_epi64(transmute(1i64), transmute(2i64));
assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
}
#[simd_test(enable = "sse2,mmx")]
unsafe fn test_mm_movepi64_pi64() {
let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0));
assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
}
#[simd_test(enable = "sse2,mmx")]
unsafe fn test_mm_movpi64_epi64() {
let r = _mm_movpi64_epi64(_mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
}
#[simd_test(enable = "sse2,mmx")]
unsafe fn test_mm_cvtpd_pi32() {
let a = _mm_setr_pd(5., 0.);
let r = _mm_cvtpd_pi32(a);
assert_eq_m64(r, _mm_setr_pi32(5, 0));
}
#[simd_test(enable = "sse2,mmx")]
unsafe fn test_mm_cvttpd_pi32() {
let a = _mm_setr_pd(5., 0.);
let r = _mm_cvttpd_pi32(a);
assert_eq_m64(r, _mm_setr_pi32(5, 0));
let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
let r = _mm_cvttpd_pi32(a);
assert_eq_m64(r, _mm_setr_pi32(i32::MIN, i32::MIN));
}
}

View file

@ -299,169 +299,6 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
}
/// Computes the absolute value of packed 8-bit integers in `a` and
/// return the unsigned results.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pabsb))]
pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
pabsb(a)
}
/// Computes the absolute value of packed 8-bit integers in `a`, and returns the
/// unsigned results.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pabsw))]
pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
pabsw(a)
}
/// Computes the absolute value of packed 32-bit integers in `a`, and returns the
/// unsigned results.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pabsd))]
pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
pabsd(a)
}
/// Shuffles packed 8-bit integers in `a` according to shuffle control mask in
/// the corresponding 8-bit element of `b`, and returns the results
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pshufb))]
pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
pshufb(a, b)
}
/// Concatenates the two 64-bit integer vector operands, and right-shifts
/// the result by the number of bytes specified in the immediate operand.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(palignr, n = 15))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => {
palignrb(a, b, $imm8)
};
}
constify_imm8!(n, call)
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of `[4 x i16]`.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phaddw))]
pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
phaddw(a, b)
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of `[2 x i32]`.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phaddd))]
pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
phaddd(a, b)
}
/// Horizontally adds the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of `[4 x i16]`. Positive sums greater than 7FFFh are
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phaddsw))]
pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
phaddsw(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of `[4 x i16]`.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phsubw))]
pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
phsubw(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of `[2 x i32]`.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phsubd))]
pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
phsubd(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of `[4 x i16]`. Positive differences greater than
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
/// saturated to 8000h.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phsubsw))]
pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
phsubsw(a, b)
}
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
/// values contained in the first source operand and packed 8-bit signed
/// integer values contained in the second source operand, adds pairs of
/// contiguous products with signed saturation, and writes the 16-bit sums to
/// the corresponding bits in the destination.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pmaddubsw))]
pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
pmaddubsw(a, b)
}
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
/// products to the 18 most significant bits by right-shifting, rounds the
/// truncated value by adding 1, and writes bits `[16:1]` to the destination.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pmulhrsw))]
pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
pmulhrsw(a, b)
}
/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
/// integer in `b` is negative, and returns the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(psignb))]
pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
psignb(a, b)
}
/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
/// integer in `b` is negative, and returns the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(psignw))]
pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
psignw(a, b)
}
/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
/// integer in `b` is negative, and returns the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(psignd))]
pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
psignd(a, b)
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.ssse3.pabs.b.128"]
@ -508,54 +345,6 @@ extern "C" {
#[link_name = "llvm.x86.ssse3.psign.d.128"]
fn psignd128(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.ssse3.pabs.b"]
fn pabsb(a: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pabs.w"]
fn pabsw(a: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pabs.d"]
fn pabsd(a: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pshuf.b"]
fn pshufb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.palignr.b"]
fn palignrb(a: __m64, b: __m64, n: u8) -> __m64;
#[link_name = "llvm.x86.ssse3.phadd.w"]
fn phaddw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phadd.d"]
fn phaddd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phadd.sw"]
fn phaddsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phsub.w"]
fn phsubw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phsub.d"]
fn phsubd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phsub.sw"]
fn phsubsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pmadd.ub.sw"]
fn pmaddubsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pmul.hr.sw"]
fn pmulhrsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.psign.b"]
fn psignb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.psign.w"]
fn psignw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.psign.d"]
fn psignd(a: __m64, b: __m64) -> __m64;
}
#[cfg(test)]
@ -761,138 +550,4 @@ mod tests {
let r = _mm_sign_epi32(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_abs_pi8() {
let r = _mm_abs_pi8(_mm_set1_pi8(-5));
assert_eq_m64(r, _mm_set1_pi8(5));
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_abs_pi16() {
let r = _mm_abs_pi16(_mm_set1_pi16(-5));
assert_eq_m64(r, _mm_set1_pi16(5));
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_abs_pi32() {
let r = _mm_abs_pi32(_mm_set1_pi32(-5));
assert_eq_m64(r, _mm_set1_pi32(5));
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_shuffle_pi8() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19);
let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4);
let r = _mm_shuffle_pi8(a, b);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_alignr_pi8() {
let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32);
let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32);
let r = _mm_alignr_pi8(a, b, 4);
assert_eq_m64(r, transmute(0x89abcdefffddeecc_u64));
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_hadd_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 128, 4, 3);
let expected = _mm_setr_pi16(3, 7, 132, 7);
let r = _mm_hadd_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_hadd_pi32() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(4, 128);
let expected = _mm_setr_pi32(3, 132);
let r = _mm_hadd_pi32(a, b);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_hadds_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(32767, 1, -32768, -1);
let expected = _mm_setr_pi16(3, 7, 32767, -32768);
let r = _mm_hadds_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_hsub_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 128, 4, 3);
let expected = _mm_setr_pi16(-1, -1, -124, 1);
let r = _mm_hsub_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_hsub_pi32() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(4, 128);
let expected = _mm_setr_pi32(-1, -124);
let r = _mm_hsub_pi32(a, b);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_hsubs_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 128, 4, 3);
let expected = _mm_setr_pi16(-1, -1, -124, 1);
let r = _mm_hsubs_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_maddubs_pi16() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19);
let expected = _mm_setr_pi16(130, 24, 192, 194);
let r = _mm_maddubs_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_mulhrs_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 32767, -1, -32768);
let expected = _mm_setr_pi16(0, 2, 0, -4);
let r = _mm_mulhrs_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_sign_pi8() {
let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8);
let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1);
let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8);
let r = _mm_sign_pi8(a, b);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_sign_pi16() {
let a = _mm_setr_pi16(-1, 2, 3, 4);
let b = _mm_setr_pi16(1, -1, 1, 0);
let expected = _mm_setr_pi16(-1, -2, 3, 0);
let r = _mm_sign_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test(enable = "ssse3,mmx")]
unsafe fn test_mm_sign_pi32() {
let a = _mm_setr_pi32(-1, 2);
let b = _mm_setr_pi32(1, 0);
let expected = _mm_setr_pi32(-1, 0);
let r = _mm_sign_pi32(a, b);
assert_eq_m64(r, expected);
}
}

View file

@ -2,15 +2,6 @@
use crate::core_arch::x86::*;
#[target_feature(enable = "mmx")]
pub unsafe fn assert_eq_m64(a: __m64, b: __m64) {
union A {
a: __m64,
b: u64,
}
assert_eq!(A { a }.b, A { a: b }.b)
}
#[target_feature(enable = "sse2")]
pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
union A {

View file

@ -44,8 +44,6 @@ pub fn simd_test(
.map(String::from)
.collect();
let mmx = target_features.iter().any(|s| s.starts_with("mmx"));
let enable_feature = string(enable_feature);
let item = TokenStream::from(item);
let name = find_name(item.clone());
@ -106,15 +104,6 @@ pub fn simd_test(
TokenStream::new()
};
let emms = if mmx {
// note: if the test requires MMX we need to clear the FPU
// registers once the test finishes before interfacing with
// other x87 code:
quote! { unsafe { super::_mm_empty() }; }
} else {
TokenStream::new()
};
let ret: TokenStream = quote_spanned! {
proc_macro2::Span::call_site() =>
#[allow(non_snake_case)]
@ -123,7 +112,6 @@ pub fn simd_test(
fn #name() {
if #force_test | (#cfg_target_features) {
let v = unsafe { #name() };
#emms
return v;
} else {
::stdarch_test::assert_skip_test_ok(stringify!(#name));