Reorganize the x86/x86_64 intrinsic folders (#334)

The public API isn't changing in this commit but the internal organization is
being rejiggered. Instead of `x86/$subtarget/$feature.rs` the folders are
changed to `coresimd/x86/$feature.rs` and `coresimd/x86_64/$feature.rs`. The
`arch::x86_64` then reexports both the contents of the `x86` module and the
`x86_64` module.
This commit is contained in:
Alex Crichton 2018-02-27 08:41:07 -06:00 committed by GitHub
parent aa4cef7723
commit 217f89bc4f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
49 changed files with 1690 additions and 1807 deletions

View file

@ -48,6 +48,7 @@ pub mod arch {
#[cfg(target_arch = "x86_64")]
pub mod x86_64 {
pub use coresimd::x86::*;
pub use coresimd::x86_64::*;
}
/// Platform-specific intrinsics for the `arm` platform.
@ -116,6 +117,8 @@ mod v16 {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
mod x86;
#[cfg(target_arch = "x86_64")]
mod x86_64;
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
mod arm;

View file

@ -42,15 +42,15 @@ pub unsafe fn _popcnt32(x: i32) -> i32 {
mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::i586::abm;
use coresimd::x86::*;
#[simd_test = "lzcnt"]
unsafe fn _lzcnt_u32() {
assert_eq!(abm::_lzcnt_u32(0b0101_1010), 25);
unsafe fn test_lzcnt_u32() {
assert_eq!(_lzcnt_u32(0b0101_1010), 25);
}
#[simd_test = "popcnt"]
unsafe fn _popcnt32() {
assert_eq!(abm::_popcnt32(0b0101_1010), 4);
unsafe fn test_popcnt32() {
assert_eq!(_popcnt32(0b0101_1010), 4);
}
}

View file

@ -96,59 +96,59 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::i586::bmi;
use coresimd::x86::*;
#[simd_test = "bmi"]
unsafe fn _bextr_u32() {
let r = bmi::_bextr_u32(0b0101_0000u32, 4, 4);
unsafe fn test_bextr_u32() {
let r = _bextr_u32(0b0101_0000u32, 4, 4);
assert_eq!(r, 0b0000_0101u32);
}
#[simd_test = "bmi"]
unsafe fn _andn_u32() {
assert_eq!(bmi::_andn_u32(0, 0), 0);
assert_eq!(bmi::_andn_u32(0, 1), 1);
assert_eq!(bmi::_andn_u32(1, 0), 0);
assert_eq!(bmi::_andn_u32(1, 1), 0);
unsafe fn test_andn_u32() {
assert_eq!(_andn_u32(0, 0), 0);
assert_eq!(_andn_u32(0, 1), 1);
assert_eq!(_andn_u32(1, 0), 0);
assert_eq!(_andn_u32(1, 1), 0);
let r = bmi::_andn_u32(0b0000_0000u32, 0b0000_0000u32);
let r = _andn_u32(0b0000_0000u32, 0b0000_0000u32);
assert_eq!(r, 0b0000_0000u32);
let r = bmi::_andn_u32(0b0000_0000u32, 0b1111_1111u32);
let r = _andn_u32(0b0000_0000u32, 0b1111_1111u32);
assert_eq!(r, 0b1111_1111u32);
let r = bmi::_andn_u32(0b1111_1111u32, 0b0000_0000u32);
let r = _andn_u32(0b1111_1111u32, 0b0000_0000u32);
assert_eq!(r, 0b0000_0000u32);
let r = bmi::_andn_u32(0b1111_1111u32, 0b1111_1111u32);
let r = _andn_u32(0b1111_1111u32, 0b1111_1111u32);
assert_eq!(r, 0b0000_0000u32);
let r = bmi::_andn_u32(0b0100_0000u32, 0b0101_1101u32);
let r = _andn_u32(0b0100_0000u32, 0b0101_1101u32);
assert_eq!(r, 0b0001_1101u32);
}
#[simd_test = "bmi"]
unsafe fn _blsi_u32() {
assert_eq!(bmi::_blsi_u32(0b1101_0000u32), 0b0001_0000u32);
unsafe fn test_blsi_u32() {
assert_eq!(_blsi_u32(0b1101_0000u32), 0b0001_0000u32);
}
#[simd_test = "bmi"]
unsafe fn _blsmsk_u32() {
let r = bmi::_blsmsk_u32(0b0011_0000u32);
unsafe fn test_blsmsk_u32() {
let r = _blsmsk_u32(0b0011_0000u32);
assert_eq!(r, 0b0001_1111u32);
}
#[simd_test = "bmi"]
unsafe fn _blsr_u32() {
unsafe fn test_blsr_u32() {
// TODO: test the behavior when the input is 0
let r = bmi::_blsr_u32(0b0011_0000u32);
let r = _blsr_u32(0b0011_0000u32);
assert_eq!(r, 0b0010_0000u32);
}
#[simd_test = "bmi"]
unsafe fn _tzcnt_u32() {
assert_eq!(bmi::_tzcnt_u32(0b0000_0001u32), 0u32);
assert_eq!(bmi::_tzcnt_u32(0b0000_0000u32), 32u32);
assert_eq!(bmi::_tzcnt_u32(0b1001_0000u32), 4u32);
unsafe fn test_tzcnt_u32() {
assert_eq!(_tzcnt_u32(0b0000_0001u32), 0u32);
assert_eq!(_tzcnt_u32(0b0000_0000u32), 32u32);
assert_eq!(_tzcnt_u32(0b1001_0000u32), 4u32);
}
}

View file

@ -67,10 +67,10 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::i586::bmi2;
use coresimd::x86::*;
#[simd_test = "bmi2"]
unsafe fn _pext_u32() {
unsafe fn test_pext_u32() {
let n = 0b1011_1110_1001_0011u32;
let m0 = 0b0110_0011_1000_0101u32;
@ -79,12 +79,12 @@ mod tests {
let m1 = 0b1110_1011_1110_1111u32;
let s1 = 0b0001_0111_0100_0011u32;
assert_eq!(bmi2::_pext_u32(n, m0), s0);
assert_eq!(bmi2::_pext_u32(n, m1), s1);
assert_eq!(_pext_u32(n, m0), s0);
assert_eq!(_pext_u32(n, m1), s1);
}
#[simd_test = "bmi2"]
unsafe fn _pdep_u32() {
unsafe fn test_pdep_u32() {
let n = 0b1011_1110_1001_0011u32;
let m0 = 0b0110_0011_1000_0101u32;
@ -93,23 +93,23 @@ mod tests {
let m1 = 0b1110_1011_1110_1111u32;
let s1 = 0b1110_1001_0010_0011u32;
assert_eq!(bmi2::_pdep_u32(n, m0), s0);
assert_eq!(bmi2::_pdep_u32(n, m1), s1);
assert_eq!(_pdep_u32(n, m0), s0);
assert_eq!(_pdep_u32(n, m1), s1);
}
#[simd_test = "bmi2"]
unsafe fn _bzhi_u32() {
unsafe fn test_bzhi_u32() {
let n = 0b1111_0010u32;
let s = 0b0001_0010u32;
assert_eq!(bmi2::_bzhi_u32(n, 5), s);
assert_eq!(_bzhi_u32(n, 5), s);
}
#[simd_test = "bmi2"]
unsafe fn _mulx_u32() {
unsafe fn test_mulx_u32() {
let a: u32 = 4_294_967_200;
let b: u32 = 2;
let mut hi = 0;
let lo = bmi2::_mulx_u32(a, b, &mut hi);
let lo = _mulx_u32(a, b, &mut hi);
/*
result = 8589934400
= 0b0001_1111_1111_1111_1111_1111_1111_0100_0000u64

View file

@ -79,7 +79,7 @@ pub fn has_cpuid() -> bool {
}
#[cfg(target_arch = "x86")]
{
use coresimd::x86::i386::{__readeflags, __writeeflags};
use coresimd::x86::{__readeflags, __writeeflags};
// On `x86` the `cpuid` instruction is not always available.
// This follows the approach indicated in:
@ -121,7 +121,7 @@ pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) {
#[cfg(test)]
mod tests {
use coresimd::x86::i586::cpuid;
use coresimd::x86::*;
#[test]
fn test_always_has_cpuid() {
@ -133,7 +133,6 @@ mod tests {
#[cfg(target_arch = "x86")]
#[test]
fn test_has_cpuid() {
use coresimd::x86::i386::__readeflags;
unsafe {
let before = __readeflags();

View file

@ -34,7 +34,7 @@ pub unsafe fn __writeeflags(eflags: u64) {
#[cfg(test)]
mod tests {
use coresimd::x86::i386::*;
use coresimd::x86::*;
#[test]
fn test_eflags() {

View file

@ -1,15 +0,0 @@
//! `i386` intrinsics
mod eflags;
pub use self::eflags::*;
#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
mod fxsr;
#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
pub use self::fxsr::*;
mod bswap;
pub use self::bswap::*;
mod rdtsc;
pub use self::rdtsc::*;

View file

@ -1,39 +0,0 @@
//! `i586` intrinsics
pub use self::cpuid::*;
pub use self::xsave::*;
pub use self::sse::*;
pub use self::sse2::*;
pub use self::sse3::*;
pub use self::ssse3::*;
pub use self::sse41::*;
pub use self::sse42::*;
pub use self::avx::*;
pub use self::avx2::*;
pub use self::abm::*;
pub use self::bmi::*;
pub use self::bmi2::*;
#[cfg(not(feature = "intel_sde"))]
pub use self::tbm::*;
mod cpuid;
mod xsave;
mod sse;
mod sse2;
mod sse3;
mod ssse3;
mod sse41;
mod sse42;
mod avx;
mod avx2;
mod abm;
mod bmi;
mod bmi2;
#[cfg(not(feature = "intel_sde"))]
mod tbm;

View file

@ -1,33 +0,0 @@
//! `i686` intrinsics
mod aes;
pub use self::aes::*;
mod rdrand;
pub use self::rdrand::*;
mod mmx;
pub use self::mmx::*;
mod pclmulqdq;
pub use self::pclmulqdq::*;
mod sse;
pub use self::sse::*;
mod sse2;
pub use self::sse2::*;
mod ssse3;
pub use self::ssse3::*;
mod sse41;
pub use self::sse41::*;
mod sse42;
pub use self::sse42::*;
#[cfg(not(feature = "intel_sde"))]
mod sse4a;
#[cfg(not(feature = "intel_sde"))]
pub use self::sse4a::*;

View file

@ -1,710 +0,0 @@
//! `i686` Streaming SIMD Extensions (SSE)
use coresimd::x86::*;
#[cfg(test)]
use stdsimd_test::assert_instr;
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.sse.cvtpi2ps"]
fn cvtpi2ps(a: __m128, b: __m64) -> __m128;
#[link_name = "llvm.x86.mmx.maskmovq"]
fn maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8);
#[link_name = "llvm.x86.mmx.pextr.w"]
fn pextrw(a: __m64, imm8: i32) -> i32;
#[link_name = "llvm.x86.mmx.pinsr.w"]
fn pinsrw(a: __m64, d: i32, imm8: i32) -> __m64;
#[link_name = "llvm.x86.mmx.pmovmskb"]
fn pmovmskb(a: __m64) -> i32;
#[link_name = "llvm.x86.sse.pshuf.w"]
fn pshufw(a: __m64, imm8: i8) -> __m64;
#[link_name = "llvm.x86.mmx.pmaxs.w"]
fn pmaxsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmaxu.b"]
fn pmaxub(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmins.w"]
fn pminsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pminu.b"]
fn pminub(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmulhu.w"]
fn pmulhuw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pavg.b"]
fn pavgb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pavg.w"]
fn pavgw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psad.bw"]
fn psadbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.sse.cvtps2pi"]
fn cvtps2pi(a: __m128) -> __m64;
#[link_name = "llvm.x86.sse.cvttps2pi"]
fn cvttps2pi(a: __m128) -> __m64;
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
pmaxsw(a, b)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
_mm_max_pi16(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
pmaxub(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
_mm_max_pu8(a, b)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
pminsw(a, b)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
_mm_min_pi16(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
pminub(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
_mm_min_pu8(a, b)
}
/// Multiplies packed 16-bit unsigned integer values and writes the
/// high-order 16 bits of each 32-bit product to the corresponding bits in
/// the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
pmulhuw(a, b)
}
/// Multiplies packed 16-bit unsigned integer values and writes the
/// high-order 16 bits of each 32-bit product to the corresponding bits in
/// the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
_mm_mulhi_pu16(a, b)
}
/// Computes the rounded averages of the packed unsigned 8-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
pavgb(a, b)
}
/// Computes the rounded averages of the packed unsigned 8-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu8(a, b)
}
/// Computes the rounded averages of the packed unsigned 16-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
pavgw(a, b)
}
/// Computes the rounded averages of the packed unsigned 16-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu16(a, b)
}
/// Subtracts the corresponding 8-bit unsigned integer values of the two
/// 64-bit vector operands and computes the absolute value for each of the
/// difference. Then sum of the 8 absolute differences is written to the
/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
psadbw(a, b)
}
/// Subtracts the corresponding 8-bit unsigned integer values of the two
/// 64-bit vector operands and computes the absolute value for each of the
/// difference. Then sum of the 8 absolute differences is written to the
/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
_mm_sad_pu8(a, b)
}
/// Converts two elements of a 64-bit vector of [2 x i32] into two
/// floating point values and writes them to the lower 64-bits of the
/// destination. The remaining higher order elements of the destination are
/// copied from the corresponding elements in the first operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
cvtpi2ps(a, b)
}
/// Converts two elements of a 64-bit vector of [2 x i32] into two
/// floating point values and writes them to the lower 64-bits of the
/// destination. The remaining higher order elements of the destination are
/// copied from the corresponding elements in the first operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 {
_mm_cvtpi32_ps(a, b)
}
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_cmpgt_pi8(b, a);
let b = _mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_cmpgt_pi16(b, a);
let c = _mm_unpackhi_pi16(a, b);
let r = _mm_setzero_ps();
let r = cvtpi2ps(r, c);
let r = _mm_movelh_ps(r, r);
let c = _mm_unpacklo_pi16(a, b);
cvtpi2ps(r, c)
}
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let c = _mm_unpackhi_pi16(a, b);
let r = _mm_setzero_ps();
let r = cvtpi2ps(r, c);
let r = _mm_movelh_ps(r, r);
let c = _mm_unpacklo_pi16(a, b);
cvtpi2ps(r, c)
}
/// Converts the two 32-bit signed integer values from each 64-bit vector
/// operand of [2 x i32] into a 128-bit vector of [4 x float].
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
let c = i586::_mm_setzero_ps();
let c = _mm_cvtpi32_ps(c, b);
let c = i586::_mm_movelh_ps(c, c);
_mm_cvtpi32_ps(c, a)
}
/// Conditionally copies the values from each 8-bit element in the first
/// 64-bit integer vector operand to the specified memory location, as
/// specified by the most significant bit in the corresponding element in the
/// second 64-bit integer vector operand.
///
/// To minimize caching, the data is flagged as non-temporal
/// (unlikely to be used again soon).
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
maskmovq(a, mask, mem_addr)
}
/// Conditionally copies the values from each 8-bit element in the first
/// 64-bit integer vector operand to the specified memory location, as
/// specified by the most significant bit in the corresponding element in the
/// second 64-bit integer vector operand.
///
/// To minimize caching, the data is flagged as non-temporal
/// (unlikely to be used again soon).
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
_mm_maskmove_si64(a, mask, mem_addr)
}
/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
/// returns it, as specified by the immediate integer operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 {
macro_rules! call {
($imm2:expr) => { pextrw(a, $imm2) as i32 }
}
constify_imm2!(imm2, call)
}
/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
/// returns it, as specified by the immediate integer operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 {
macro_rules! call {
($imm2:expr) => { pextrw(a, $imm2) as i32 }
}
constify_imm2!(imm2, call)
}
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
/// specified by the immediate operand `n`.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
macro_rules! call {
($imm2:expr) => { pinsrw(a, d, $imm2) }
}
constify_imm2!(imm2, call)
}
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
/// specified by the immediate operand `n`.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
macro_rules! call {
($imm2:expr) => { pinsrw(a, d, $imm2) }
}
constify_imm2!(imm2, call)
}
/// Takes the most significant bit from each 8-bit element in a 64-bit
/// integer vector to create a 16-bit mask value. Zero-extends the value to
/// 32-bit integer and writes it to the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
pmovmskb(a)
}
/// Takes the most significant bit from each 8-bit element in a 64-bit
/// integer vector to create a 16-bit mask value. Zero-extends the value to
/// 32-bit integer and writes it to the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
_mm_movemask_pi8(a)
}
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
/// destination, as specified by the immediate value operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => { pshufw(a, $imm8) }
}
constify_imm8!(imm8, call)
}
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
/// destination, as specified by the immediate value operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => { pshufw(a, $imm8) }
}
constify_imm8!(imm8, call)
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
cvttps2pi(a)
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
_mm_cvttps_pi32(a)
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 {
cvtps2pi(a)
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 {
_mm_cvtps_pi32(a)
}
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
/// packed 16-bit integers.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 {
let b = _mm_cvtps_pi32(a);
let a = _mm_movehl_ps(a, a);
let c = _mm_cvtps_pi32(a);
_mm_packs_pi32(b, c)
}
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
/// packed 8-bit integers, and returns theem in the lower 4 elements of the
/// result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
let b = _mm_cvtps_pi16(a);
let c = _mm_setzero_si64();
_mm_packs_pi16(b, c)
}
#[cfg(test)]
mod tests {
use coresimd::x86::*;
use stdsimd_test::simd_test;
#[simd_test = "sse,mmx"]
unsafe fn test_mm_max_pi16() {
let a = _mm_setr_pi16(-1, 6, -3, 8);
let b = _mm_setr_pi16(5, -2, 7, -4);
let r = _mm_setr_pi16(5, 6, 7, 8);
assert_eq_m64(r, _mm_max_pi16(a, b));
assert_eq_m64(r, _m_pmaxsw(a, b));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_max_pu8() {
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8);
assert_eq_m64(r, _mm_max_pu8(a, b));
assert_eq_m64(r, _m_pmaxub(a, b));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_min_pi16() {
let a = _mm_setr_pi16(-1, 6, -3, 8);
let b = _mm_setr_pi16(5, -2, 7, -4);
let r = _mm_setr_pi16(-1, -2, -3, -4);
assert_eq_m64(r, _mm_min_pi16(a, b));
assert_eq_m64(r, _m_pminsw(a, b));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_min_pu8() {
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4);
assert_eq_m64(r, _mm_min_pu8(a, b));
assert_eq_m64(r, _m_pminub(a, b));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_mulhi_pu16() {
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _mm_mulhi_pu16(a, b);
assert_eq_m64(r, _mm_set1_pi16(15));
}
#[simd_test = "sse,mmx"]
unsafe fn test_m_pmulhuw() {
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _m_pmulhuw(a, b);
assert_eq_m64(r, _mm_set1_pi16(15));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_avg_pu8() {
let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9));
let r = _mm_avg_pu8(a, b);
assert_eq_m64(r, _mm_set1_pi8(6));
let r = _m_pavgb(a, b);
assert_eq_m64(r, _mm_set1_pi8(6));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_avg_pu16() {
let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9));
let r = _mm_avg_pu16(a, b);
assert_eq_m64(r, _mm_set1_pi16(6));
let r = _m_pavgw(a, b);
assert_eq_m64(r, _mm_set1_pi16(6));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_sad_pu8() {
#[cfg_attr(rustfmt, rustfmt_skip)]
let a = _mm_setr_pi8(
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
1, 2, 3, 4,
);
let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1);
let r = _mm_sad_pu8(a, b);
assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
let r = _m_psadbw(a, b);
assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpi32_ps() {
let a = _mm_setr_ps(0., 0., 3., 4.);
let b = _mm_setr_pi32(1, 2);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi32_ps(a, b);
assert_eq_m128(r, expected);
let r = _mm_cvt_pi2ps(a, b);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpi16_ps() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi16_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpu16_ps() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpu16_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpi8_ps() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi8_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpu8_ps() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpu8_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpi32x2_ps() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi32x2_ps(a, b);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_maskmove_si64() {
let a = _mm_set1_pi8(9);
let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0);
let mut r = _mm_set1_pi8(0);
_mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8);
let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0);
assert_eq_m64(r, e);
let mut r = _mm_set1_pi8(0);
_m_maskmovq(a, mask, &mut r as *mut _ as *mut i8);
assert_eq_m64(r, e);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_extract_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_extract_pi16(a, 0);
assert_eq!(r, 1);
let r = _mm_extract_pi16(a, 1);
assert_eq!(r, 2);
let r = _m_pextrw(a, 1);
assert_eq!(r, 2);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_insert_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_insert_pi16(a, 0, 0b0);
let expected = _mm_setr_pi16(0, 2, 3, 4);
assert_eq_m64(r, expected);
let r = _mm_insert_pi16(a, 0, 0b10);
let expected = _mm_setr_pi16(1, 2, 0, 4);
assert_eq_m64(r, expected);
let r = _m_pinsrw(a, 0, 0b10);
assert_eq_m64(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_movemask_pi8() {
let a =
_mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
let r = _mm_movemask_pi8(a);
assert_eq!(r, 0b10001);
let r = _m_pmovmskb(a);
assert_eq!(r, 0b10001);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_shuffle_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_shuffle_pi16(a, 0b00_01_01_11);
let expected = _mm_setr_pi16(4, 2, 2, 1);
assert_eq_m64(r, expected);
let r = _m_pshufw(a, 0b00_01_01_11);
assert_eq_m64(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtps_pi32() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi32(1, 2);
assert_eq_m64(r, _mm_cvtps_pi32(a));
assert_eq_m64(r, _mm_cvt_ps2pi(a));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvttps_pi32() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi32(7, 2);
assert_eq_m64(r, _mm_cvttps_pi32(a));
assert_eq_m64(r, _mm_cvtt_ps2pi(a));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtps_pi16() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi16(7, 2, 3, 4);
assert_eq_m64(r, _mm_cvtps_pi16(a));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtps_pi8() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0);
assert_eq_m64(r, _mm_cvtps_pi8(a));
}
}

View file

@ -1,225 +0,0 @@
//! `i686`'s Streaming SIMD Extensions 2 (SSE2)
use coresimd::simd_llvm::simd_extract;
use coresimd::x86::*;
use mem;
#[cfg(test)]
use stdsimd_test::assert_instr;
/// Adds two signed or unsigned 64-bit integer values, returning the
/// lower 64 bits of the sum.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(paddq))]
pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
paddq(a, b)
}
/// Multiplies 32-bit unsigned integer values contained in the lower bits
/// of the two 64-bit integer vectors and returns the 64-bit unsigned
/// product.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(pmuludq))]
pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
pmuludq(a, b)
}
/// Subtracts signed or unsigned 64-bit integer values and writes the
/// difference to the corresponding bits in the destination.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(psubq))]
pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
psubq(a, b)
}
/// Converts the two signed 32-bit integer elements of a 64-bit vector of
/// [2 x i32] into two double-precision floating-point values, returned in a
/// 128-bit vector of [2 x double].
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2pd))]
pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d {
cvtpi2pd(a)
}
/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
/// the specified 64-bit integer values.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// no particular instruction to test
pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i {
_mm_set_epi64x(mem::transmute(e1), mem::transmute(e0))
}
/// Initializes both values in a 128-bit vector of [2 x i64] with the
/// specified 64-bit value.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// no particular instruction to test
pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i {
_mm_set_epi64x(mem::transmute(a), mem::transmute(a))
}
/// Constructs a 128-bit integer vector, initialized in reverse order
/// with the specified 64-bit integral values.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// no particular instruction to test
pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i {
_mm_set_epi64x(mem::transmute(e0), mem::transmute(e1))
}
/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
/// integer.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
// instr?
pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 {
mem::transmute(simd_extract::<_, i64>(a.as_i64x2(), 0))
}
/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
/// upper bits.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
// instr?
pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i {
_mm_set_epi64x(0, mem::transmute(a))
}
/// Converts the two double-precision floating-point elements of a
/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
/// returned in a 64-bit vector of [2 x i32].
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(cvtpd2pi))]
pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 {
cvtpd2pi(a)
}
/// Converts the two double-precision floating-point elements of a
/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
/// returned in a 64-bit vector of [2 x i32].
/// If the result of either conversion is inexact, the result is truncated
/// (rounded towards zero) regardless of the current MXCSR setting.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(cvttpd2pi))]
pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 {
cvttpd2pi(a)
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.mmx.padd.q"]
fn paddq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmulu.dq"]
fn pmuludq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psub.q"]
fn psubq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.sse.cvtpi2pd"]
fn cvtpi2pd(a: __m64) -> __m128d;
#[link_name = "llvm.x86.sse.cvtpd2pi"]
fn cvtpd2pi(a: __m128d) -> __m64;
#[link_name = "llvm.x86.sse.cvttpd2pi"]
fn cvttpd2pi(a: __m128d) -> __m64;
}
#[cfg(test)]
mod tests {
use std::mem;
use stdsimd_test::simd_test;
use coresimd::x86::*;
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_add_si64() {
let a = 1i64;
let b = 2i64;
let expected = 3i64;
let r = _mm_add_si64(mem::transmute(a), mem::transmute(b));
assert_eq!(mem::transmute::<__m64, i64>(r), expected);
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_mul_su32() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(3, 4);
let expected = 3u64;
let r = _mm_mul_su32(a, b);
assert_eq_m64(r, mem::transmute(expected));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_sub_si64() {
let a = 1i64;
let b = 2i64;
let expected = -1i64;
let r = _mm_sub_si64(mem::transmute(a), mem::transmute(b));
assert_eq!(mem::transmute::<__m64, i64>(r), expected);
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_cvtpi32_pd() {
let a = _mm_setr_pi32(1, 2);
let expected = _mm_setr_pd(1., 2.);
let r = _mm_cvtpi32_pd(a);
assert_eq_m128d(r, expected);
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_set_epi64() {
let r = _mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
assert_eq_m128i(r, _mm_setr_epi64x(2, 1));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_set1_epi64() {
let r = _mm_set1_epi64(mem::transmute(1i64));
assert_eq_m128i(r, _mm_setr_epi64x(1, 1));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_setr_epi64() {
let r = _mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_movepi64_pi64() {
let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0));
assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_movpi64_epi64() {
let r = _mm_movpi64_epi64(_mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_cvtpd_pi32() {
let a = _mm_setr_pd(5., 0.);
let r = _mm_cvtpd_pi32(a);
assert_eq_m64(r, _mm_setr_pi32(5, 0));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_cvttpd_pi32() {
use std::{f64, i32};
let a = _mm_setr_pd(5., 0.);
let r = _mm_cvttpd_pi32(a);
assert_eq_m64(r, _mm_setr_pi32(5, 0));
let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
let r = _mm_cvttpd_pi32(a);
assert_eq_m64(r, _mm_setr_pi32(i32::MIN, i32::MIN));
}
}

View file

@ -1,240 +0,0 @@
//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
use coresimd::v128::*;
use coresimd::x86::*;
#[cfg(test)]
use stdsimd_test::assert_instr;
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.sse41.ptestz"]
fn ptestz(a: i64x2, mask: i64x2) -> i32;
#[link_name = "llvm.x86.sse41.ptestc"]
fn ptestc(a: i64x2, mask: i64x2) -> i32;
#[link_name = "llvm.x86.sse41.ptestnzc"]
fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
}
/// Tests whether the specified bits in a 128-bit integer vector are all
/// zeros.
///
/// Arguments:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
/// * `mask` - A 128-bit integer vector selecting which bits to test in
/// operand `a`.
///
/// Returns:
///
/// * `1` - if the specified bits are all zeros,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
ptestz(a.as_i64x2(), mask.as_i64x2())
}
/// Tests whether the specified bits in a 128-bit integer vector are all
/// ones.
///
/// Arguments:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
/// * `mask` - A 128-bit integer vector selecting which bits to test in
/// operand `a`.
///
/// Returns:
///
/// * `1` - if the specified bits are all ones,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestc(a.as_i64x2(), mask.as_i64x2())
}
/// Tests whether the specified bits in a 128-bit integer vector are
/// neither all zeros nor all ones.
///
/// Arguments:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
/// * `mask` - A 128-bit integer vector selecting which bits to test in
/// operand `a`.
///
/// Returns:
///
/// * `1` - if the specified bits are neither all zeros nor all ones,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestnzc(a.as_i64x2(), mask.as_i64x2())
}
/// Tests whether the specified bits in a 128-bit integer vector are all
/// zeros.
///
/// Arguments:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
/// * `mask` - A 128-bit integer vector selecting which bits to test in
/// operand `a`.
///
/// Returns:
///
/// * `1` - if the specified bits are all zeros,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
_mm_testz_si128(a, mask)
}
/// Tests whether the specified bits in `a` 128-bit integer vector are all
/// ones.
///
/// Argument:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
///
/// Returns:
///
/// * `1` - if the bits specified in the operand are all set to 1,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pcmpeqd))]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
_mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
}
/// Tests whether the specified bits in a 128-bit integer vector are
/// neither all zeros nor all ones.
///
/// Arguments:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
/// * `mask` - A 128-bit integer vector selecting which bits to test in
/// operand `a`.
///
/// Returns:
///
/// * `1` - if the specified bits are neither all zeros nor all ones,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
_mm_testnzc_si128(a, mask)
}
#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::*;
#[simd_test = "sse4.1"]
unsafe fn test_mm_testz_si128() {
let a = _mm_set1_epi8(1);
let mask = _mm_set1_epi8(0);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b011);
let mask = _mm_set1_epi8(0b100);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 1);
}
#[simd_test = "sse4.1"]
unsafe fn test_mm_testc_si128() {
let a = _mm_set1_epi8(-1);
let mask = _mm_set1_epi8(0);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b100);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 1);
}
#[simd_test = "sse4.1"]
unsafe fn test_mm_testnzc_si128() {
let a = _mm_set1_epi8(0);
let mask = _mm_set1_epi8(1);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(-1);
let mask = _mm_set1_epi8(0);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b101);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
}
#[simd_test = "sse4.1"]
unsafe fn test_mm_test_all_zeros() {
let a = _mm_set1_epi8(1);
let mask = _mm_set1_epi8(0);
let r = _mm_test_all_zeros(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_test_all_zeros(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b011);
let mask = _mm_set1_epi8(0b100);
let r = _mm_test_all_zeros(a, mask);
assert_eq!(r, 1);
}
#[simd_test = "sse4.1"]
unsafe fn test_mm_test_all_ones() {
let a = _mm_set1_epi8(-1);
let r = _mm_test_all_ones(a);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let r = _mm_test_all_ones(a);
assert_eq!(r, 0);
}
#[simd_test = "sse4.1"]
unsafe fn test_mm_test_mix_ones_zeros() {
let a = _mm_set1_epi8(0);
let mask = _mm_set1_epi8(1);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(-1);
let mask = _mm_set1_epi8(0);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b101);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 0);
}
}

View file

@ -1,35 +0,0 @@
//! `i686`'s Streaming SIMD Extensions 4.2 (SSE4.2)
use coresimd::simd_llvm::*;
use coresimd::v128::*;
use coresimd::x86::*;
#[cfg(test)]
use stdsimd_test::assert_instr;
/// Compare packed 64-bit integers in `a` and `b` for greater-than,
/// return the results.
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpgtq))]
pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
}
#[cfg(test)]
mod tests {
use coresimd::x86::*;
use stdsimd_test::simd_test;
#[simd_test = "sse4.2"]
unsafe fn test_mm_cmpgt_epi64() {
let a = _mm_setr_epi64x(0, 0x2a);
let b = _mm_set1_epi64x(0x00);
let i = _mm_cmpgt_epi64(a, b);
assert_eq_m128i(
i,
_mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64),
);
}
}

View file

@ -1,361 +0,0 @@
//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
#[cfg(test)]
use stdsimd_test::assert_instr;
use coresimd::x86::*;
/// Compute the absolute value of packed 8-bit integers in `a` and
/// return the unsigned results.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pabsb))]
pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
pabsb(a)
}
/// Compute the absolute value of packed 8-bit integers in `a`, and return the
/// unsigned results.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pabsw))]
pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
pabsw(a)
}
/// Compute the absolute value of packed 32-bit integers in `a`, and return the
/// unsigned results.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pabsd))]
pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
pabsd(a)
}
/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
/// the corresponding 8-bit element of `b`, and return the results
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pshufb))]
pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
pshufb(a, b)
}
/// Concatenates the two 64-bit integer vector operands, and right-shifts
/// the result by the number of bytes specified in the immediate operand.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(palignr, n = 15))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => {
palignrb(a, b, $imm8)
}
}
constify_imm8!(n, call)
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16].
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phaddw))]
pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
phaddw(a, b)
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [2 x i32].
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phaddd))]
pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
phaddd(a, b)
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phaddsw))]
pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
phaddsw(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [4 x i16].
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phsubw))]
pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
phsubw(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [2 x i32].
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phsubd))]
pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
phsubd(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
/// saturated to 8000h.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phsubsw))]
pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
phsubsw(a, b)
}
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
/// values contained in the first source operand and packed 8-bit signed
/// integer values contained in the second source operand, adds pairs of
/// contiguous products with signed saturation, and writes the 16-bit sums to
/// the corresponding bits in the destination.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pmaddubsw))]
pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
pmaddubsw(a, b)
}
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
/// products to the 18 most significant bits by right-shifting, rounds the
/// truncated value by adding 1, and writes bits [16:1] to the destination.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pmulhrsw))]
pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
pmulhrsw(a, b)
}
/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
/// integer in `b` is negative, and return the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(psignb))]
pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
psignb(a, b)
}
/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
/// integer in `b` is negative, and return the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(psignw))]
pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
psignw(a, b)
}
/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
/// integer in `b` is negative, and return the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(psignd))]
pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
psignd(a, b)
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.ssse3.pabs.b"]
fn pabsb(a: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pabs.w"]
fn pabsw(a: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pabs.d"]
fn pabsd(a: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pshuf.b"]
fn pshufb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.palignr.b"]
fn palignrb(a: __m64, b: __m64, n: u8) -> __m64;
#[link_name = "llvm.x86.ssse3.phadd.w"]
fn phaddw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phadd.d"]
fn phaddd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phadd.sw"]
fn phaddsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phsub.w"]
fn phsubw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phsub.d"]
fn phsubd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phsub.sw"]
fn phsubsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pmadd.ub.sw"]
fn pmaddubsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pmul.hr.sw"]
fn pmulhrsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.psign.b"]
fn psignb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.psign.w"]
fn psignw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.psign.d"]
fn psignd(a: __m64, b: __m64) -> __m64;
}
#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::*;
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_abs_pi8() {
let r = _mm_abs_pi8(_mm_set1_pi8(-5));
assert_eq_m64(r, _mm_set1_pi8(5));
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_abs_pi16() {
let r = _mm_abs_pi16(_mm_set1_pi16(-5));
assert_eq_m64(r, _mm_set1_pi16(5));
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_abs_pi32() {
let r = _mm_abs_pi32(_mm_set1_pi32(-5));
assert_eq_m64(r, _mm_set1_pi32(5));
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_shuffle_pi8() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19);
let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4);
let r = _mm_shuffle_pi8(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_alignr_pi8() {
let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32);
let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32);
let r = _mm_alignr_pi8(a, b, 4);
assert_eq_m64(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hadd_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 128, 4, 3);
let expected = _mm_setr_pi16(3, 7, 132, 7);
let r = _mm_hadd_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hadd_pi32() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(4, 128);
let expected = _mm_setr_pi32(3, 132);
let r = _mm_hadd_pi32(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hadds_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(32767, 1, -32768, -1);
let expected = _mm_setr_pi16(3, 7, 32767, -32768);
let r = _mm_hadds_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hsub_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 128, 4, 3);
let expected = _mm_setr_pi16(-1, -1, -124, 1);
let r = _mm_hsub_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hsub_pi32() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(4, 128);
let expected = _mm_setr_pi32(-1, -124);
let r = _mm_hsub_pi32(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hsubs_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 128, 4, 3);
let expected = _mm_setr_pi16(-1, -1, -124, 1);
let r = _mm_hsubs_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_maddubs_pi16() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19);
let expected = _mm_setr_pi16(130, 24, 192, 194);
let r = _mm_maddubs_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_mulhrs_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 32767, -1, -32768);
let expected = _mm_setr_pi16(0, 2, 0, -4);
let r = _mm_mulhrs_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_sign_pi8() {
let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8);
let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1);
let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8);
let r = _mm_sign_pi8(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_sign_pi16() {
let a = _mm_setr_pi16(-1, 2, 3, 4);
let b = _mm_setr_pi16(1, -1, 1, 0);
let expected = _mm_setr_pi16(-1, -2, 3, 0);
let r = _mm_sign_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_sign_pi32() {
let a = _mm_setr_pi32(-1, 2);
let b = _mm_setr_pi32(1, 0);
let expected = _mm_setr_pi32(-1, 0);
let r = _mm_sign_pi32(a, b);
assert_eq_m64(r, expected);
}
}

View file

@ -327,7 +327,7 @@ pub use self::test::*;
#[doc(hidden)]
#[allow(non_camel_case_types)]
trait m128iExt: Sized {
pub(crate) trait m128iExt: Sized {
fn as_m128i(self) -> __m128i;
#[inline]
@ -380,7 +380,7 @@ impl m128iExt for __m128i {
#[doc(hidden)]
#[allow(non_camel_case_types)]
trait m256iExt: Sized {
pub(crate) trait m256iExt: Sized {
fn as_m256i(self) -> __m256i;
#[inline]
@ -431,21 +431,69 @@ impl m256iExt for __m256i {
}
}
mod i386;
pub use self::i386::*;
// x86 w/o sse2
mod i586;
pub use self::i586::*;
mod eflags;
pub use self::eflags::*;
// `i686` is `i586 + sse2`.
//
// This module is not available for `i586` targets,
// but available for all `i686` targets by default
mod i686;
pub use self::i686::*;
#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
mod fxsr;
#[cfg(dont_compile_me)] // TODO: need to upstream `fxsr` target feature
pub use self::fxsr::*;
#[cfg(target_arch = "x86_64")]
mod x86_64;
#[cfg(target_arch = "x86_64")]
pub use self::x86_64::*;
mod bswap;
pub use self::bswap::*;
mod rdtsc;
pub use self::rdtsc::*;
mod cpuid;
pub use self::cpuid::*;
mod xsave;
pub use self::xsave::*;
mod sse;
pub use self::sse::*;
mod sse2;
pub use self::sse2::*;
mod sse3;
pub use self::sse3::*;
mod ssse3;
pub use self::ssse3::*;
mod sse41;
pub use self::sse41::*;
mod sse42;
pub use self::sse42::*;
mod avx;
pub use self::avx::*;
mod avx2;
pub use self::avx2::*;
mod abm;
pub use self::abm::*;
mod bmi;
pub use self::bmi::*;
mod bmi2;
pub use self::bmi2::*;
#[cfg(not(feature = "intel_sde"))]
mod sse4a;
#[cfg(not(feature = "intel_sde"))]
pub use self::sse4a::*;
#[cfg(not(feature = "intel_sde"))]
mod tbm;
#[cfg(not(feature = "intel_sde"))]
pub use self::tbm::*;
mod mmx;
pub use self::mmx::*;
mod pclmulqdq;
pub use self::pclmulqdq::*;
mod aes;
pub use self::aes::*;
mod rdrand;
pub use self::rdrand::*;

View file

@ -54,7 +54,7 @@ extern "C" {
#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::i386::rdtsc;
use coresimd::x86::rdtsc;
#[simd_test = "sse2"]
unsafe fn _rdtsc() {

View file

@ -1680,6 +1680,38 @@ extern "C" {
fn cmpss(a: __m128, b: __m128, imm8: i8) -> __m128;
#[link_name = "llvm.x86.mmx.movnt.dq"]
fn movntdq(a: *mut __m64, b: __m64);
#[link_name = "llvm.x86.sse.cvtpi2ps"]
fn cvtpi2ps(a: __m128, b: __m64) -> __m128;
#[link_name = "llvm.x86.mmx.maskmovq"]
fn maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8);
#[link_name = "llvm.x86.mmx.pextr.w"]
fn pextrw(a: __m64, imm8: i32) -> i32;
#[link_name = "llvm.x86.mmx.pinsr.w"]
fn pinsrw(a: __m64, d: i32, imm8: i32) -> __m64;
#[link_name = "llvm.x86.mmx.pmovmskb"]
fn pmovmskb(a: __m64) -> i32;
#[link_name = "llvm.x86.sse.pshuf.w"]
fn pshufw(a: __m64, imm8: i8) -> __m64;
#[link_name = "llvm.x86.mmx.pmaxs.w"]
fn pmaxsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmaxu.b"]
fn pmaxub(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmins.w"]
fn pminsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pminu.b"]
fn pminub(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmulhu.w"]
fn pmulhuw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pavg.b"]
fn pavgb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pavg.w"]
fn pavgw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psad.bw"]
fn psadbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.sse.cvtps2pi"]
fn cvtps2pi(a: __m128) -> __m64;
#[link_name = "llvm.x86.sse.cvttps2pi"]
fn cvttps2pi(a: __m128) -> __m64;
}
/// Stores `a` into the memory at `mem_addr` using a non-temporal memory hint.
@ -1702,6 +1734,432 @@ pub unsafe fn _mm_stream_pi(mem_addr: *mut __m64, a: __m64) {
movntdq(mem_addr, a)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _mm_max_pi16(a: __m64, b: __m64) -> __m64 {
pmaxsw(a, b)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxsw))]
pub unsafe fn _m_pmaxsw(a: __m64, b: __m64) -> __m64 {
_mm_max_pi16(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _mm_max_pu8(a: __m64, b: __m64) -> __m64 {
pmaxub(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// greatest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmaxub))]
pub unsafe fn _m_pmaxub(a: __m64, b: __m64) -> __m64 {
_mm_max_pu8(a, b)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _mm_min_pi16(a: __m64, b: __m64) -> __m64 {
pminsw(a, b)
}
/// Compares the packed 16-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminsw))]
pub unsafe fn _m_pminsw(a: __m64, b: __m64) -> __m64 {
_mm_min_pi16(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _mm_min_pu8(a: __m64, b: __m64) -> __m64 {
pminub(a, b)
}
/// Compares the packed 8-bit signed integers of `a` and `b` writing the
/// smallest value into the result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pminub))]
pub unsafe fn _m_pminub(a: __m64, b: __m64) -> __m64 {
_mm_min_pu8(a, b)
}
/// Multiplies packed 16-bit unsigned integer values and writes the
/// high-order 16 bits of each 32-bit product to the corresponding bits in
/// the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _mm_mulhi_pu16(a: __m64, b: __m64) -> __m64 {
pmulhuw(a, b)
}
/// Multiplies packed 16-bit unsigned integer values and writes the
/// high-order 16 bits of each 32-bit product to the corresponding bits in
/// the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmulhuw))]
pub unsafe fn _m_pmulhuw(a: __m64, b: __m64) -> __m64 {
_mm_mulhi_pu16(a, b)
}
/// Computes the rounded averages of the packed unsigned 8-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _mm_avg_pu8(a: __m64, b: __m64) -> __m64 {
pavgb(a, b)
}
/// Computes the rounded averages of the packed unsigned 8-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgb))]
pub unsafe fn _m_pavgb(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu8(a, b)
}
/// Computes the rounded averages of the packed unsigned 16-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _mm_avg_pu16(a: __m64, b: __m64) -> __m64 {
pavgw(a, b)
}
/// Computes the rounded averages of the packed unsigned 16-bit integer
/// values and writes the averages to the corresponding bits in the
/// destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pavgw))]
pub unsafe fn _m_pavgw(a: __m64, b: __m64) -> __m64 {
_mm_avg_pu16(a, b)
}
/// Subtracts the corresponding 8-bit unsigned integer values of the two
/// 64-bit vector operands and computes the absolute value for each of the
/// difference. Then sum of the 8 absolute differences is written to the
/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _mm_sad_pu8(a: __m64, b: __m64) -> __m64 {
psadbw(a, b)
}
/// Subtracts the corresponding 8-bit unsigned integer values of the two
/// 64-bit vector operands and computes the absolute value for each of the
/// difference. Then sum of the 8 absolute differences is written to the
/// bits [15:0] of the destination; the remaining bits [63:16] are cleared.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(psadbw))]
pub unsafe fn _m_psadbw(a: __m64, b: __m64) -> __m64 {
_mm_sad_pu8(a, b)
}
/// Converts two elements of a 64-bit vector of [2 x i32] into two
/// floating point values and writes them to the lower 64-bits of the
/// destination. The remaining higher order elements of the destination are
/// copied from the corresponding elements in the first operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32_ps(a: __m128, b: __m64) -> __m128 {
cvtpi2ps(a, b)
}
/// Converts two elements of a 64-bit vector of [2 x i32] into two
/// floating point values and writes them to the lower 64-bits of the
/// destination. The remaining higher order elements of the destination are
/// copied from the corresponding elements in the first operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvt_pi2ps(a: __m128, b: __m64) -> __m128 {
_mm_cvtpi32_ps(a, b)
}
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi8_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_cmpgt_pi8(b, a);
let b = _mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}
/// Converts the lower 4 8-bit values of `a` into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpu8_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(b)
}
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi16_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let b = _mm_cmpgt_pi16(b, a);
let c = _mm_unpackhi_pi16(a, b);
let r = _mm_setzero_ps();
let r = cvtpi2ps(r, c);
let r = _mm_movelh_ps(r, r);
let c = _mm_unpacklo_pi16(a, b);
cvtpi2ps(r, c)
}
/// Converts a 64-bit vector of `i16`s into a 128-bit vector of 4 `f32`s.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpu16_ps(a: __m64) -> __m128 {
let b = _mm_setzero_si64();
let c = _mm_unpackhi_pi16(a, b);
let r = _mm_setzero_ps();
let r = cvtpi2ps(r, c);
let r = _mm_movelh_ps(r, r);
let c = _mm_unpacklo_pi16(a, b);
cvtpi2ps(r, c)
}
/// Converts the two 32-bit signed integer values from each 64-bit vector
/// operand of [2 x i32] into a 128-bit vector of [4 x float].
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2ps))]
pub unsafe fn _mm_cvtpi32x2_ps(a: __m64, b: __m64) -> __m128 {
let c = _mm_setzero_ps();
let c = _mm_cvtpi32_ps(c, b);
let c = _mm_movelh_ps(c, c);
_mm_cvtpi32_ps(c, a)
}
/// Conditionally copies the values from each 8-bit element in the first
/// 64-bit integer vector operand to the specified memory location, as
/// specified by the most significant bit in the corresponding element in the
/// second 64-bit integer vector operand.
///
/// To minimize caching, the data is flagged as non-temporal
/// (unlikely to be used again soon).
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _mm_maskmove_si64(a: __m64, mask: __m64, mem_addr: *mut i8) {
maskmovq(a, mask, mem_addr)
}
/// Conditionally copies the values from each 8-bit element in the first
/// 64-bit integer vector operand to the specified memory location, as
/// specified by the most significant bit in the corresponding element in the
/// second 64-bit integer vector operand.
///
/// To minimize caching, the data is flagged as non-temporal
/// (unlikely to be used again soon).
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(maskmovq))]
pub unsafe fn _m_maskmovq(a: __m64, mask: __m64, mem_addr: *mut i8) {
_mm_maskmove_si64(a, mask, mem_addr)
}
/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
/// returns it, as specified by the immediate integer operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_extract_pi16(a: __m64, imm2: i32) -> i32 {
macro_rules! call {
($imm2:expr) => { pextrw(a, $imm2) as i32 }
}
constify_imm2!(imm2, call)
}
/// Extracts 16-bit element from a 64-bit vector of [4 x i16] and
/// returns it, as specified by the immediate integer operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pextrw, imm2 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _m_pextrw(a: __m64, imm2: i32) -> i32 {
macro_rules! call {
($imm2:expr) => { pextrw(a, $imm2) as i32 }
}
constify_imm2!(imm2, call)
}
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
/// specified by the immediate operand `n`.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_insert_pi16(a: __m64, d: i32, imm2: i32) -> __m64 {
macro_rules! call {
($imm2:expr) => { pinsrw(a, d, $imm2) }
}
constify_imm2!(imm2, call)
}
/// Copies data from the 64-bit vector of [4 x i16] to the destination,
/// and inserts the lower 16-bits of an integer operand at the 16-bit offset
/// specified by the immediate operand `n`.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pinsrw, imm2 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _m_pinsrw(a: __m64, d: i32, imm2: i32) -> __m64 {
macro_rules! call {
($imm2:expr) => { pinsrw(a, d, $imm2) }
}
constify_imm2!(imm2, call)
}
/// Takes the most significant bit from each 8-bit element in a 64-bit
/// integer vector to create a 16-bit mask value. Zero-extends the value to
/// 32-bit integer and writes it to the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _mm_movemask_pi8(a: __m64) -> i32 {
pmovmskb(a)
}
/// Takes the most significant bit from each 8-bit element in a 64-bit
/// integer vector to create a 16-bit mask value. Zero-extends the value to
/// 32-bit integer and writes it to the destination.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pmovmskb))]
pub unsafe fn _m_pmovmskb(a: __m64) -> i32 {
_mm_movemask_pi8(a)
}
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
/// destination, as specified by the immediate value operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_shuffle_pi16(a: __m64, imm8: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => { pshufw(a, $imm8) }
}
constify_imm8!(imm8, call)
}
/// Shuffles the 4 16-bit integers from a 64-bit integer vector to the
/// destination, as specified by the immediate value operand.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(pshufw, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _m_pshufw(a: __m64, imm8: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => { pshufw(a, $imm8) }
}
constify_imm8!(imm8, call)
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvttps_pi32(a: __m128) -> __m64 {
cvttps2pi(a)
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers with truncation.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvttps2pi))]
pub unsafe fn _mm_cvtt_ps2pi(a: __m128) -> __m64 {
_mm_cvttps_pi32(a)
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi32(a: __m128) -> __m64 {
cvtps2pi(a)
}
/// Convert the two lower packed single-precision (32-bit) floating-point
/// elements in `a` to packed 32-bit integers.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvt_ps2pi(a: __m128) -> __m64 {
_mm_cvtps_pi32(a)
}
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
/// packed 16-bit integers.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi16(a: __m128) -> __m64 {
let b = _mm_cvtps_pi32(a);
let a = _mm_movehl_ps(a, a);
let c = _mm_cvtps_pi32(a);
_mm_packs_pi32(b, c)
}
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
/// packed 8-bit integers, and returns theem in the lower 4 elements of the
/// result.
#[inline]
#[target_feature(enable = "sse,mmx")]
#[cfg_attr(test, assert_instr(cvtps2pi))]
pub unsafe fn _mm_cvtps_pi8(a: __m128) -> __m64 {
let b = _mm_cvtps_pi16(a);
let c = _mm_setzero_si64();
_mm_packs_pi16(b, c)
}
#[cfg(test)]
mod tests {
use std::mem::transmute;
@ -3121,4 +3579,240 @@ mod tests {
_mm_stream_pi(&mut *mem as *mut _ as *mut _, a);
assert_eq_m64(a, *mem);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_max_pi16() {
let a = _mm_setr_pi16(-1, 6, -3, 8);
let b = _mm_setr_pi16(5, -2, 7, -4);
let r = _mm_setr_pi16(5, 6, 7, 8);
assert_eq_m64(r, _mm_max_pi16(a, b));
assert_eq_m64(r, _m_pmaxsw(a, b));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_max_pu8() {
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
let r = _mm_setr_pi8(5, 6, 7, 8, 5, 6, 7, 8);
assert_eq_m64(r, _mm_max_pu8(a, b));
assert_eq_m64(r, _m_pmaxub(a, b));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_min_pi16() {
let a = _mm_setr_pi16(-1, 6, -3, 8);
let b = _mm_setr_pi16(5, -2, 7, -4);
let r = _mm_setr_pi16(-1, -2, -3, -4);
assert_eq_m64(r, _mm_min_pi16(a, b));
assert_eq_m64(r, _m_pminsw(a, b));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_min_pu8() {
let a = _mm_setr_pi8(2, 6, 3, 8, 2, 6, 3, 8);
let b = _mm_setr_pi8(5, 2, 7, 4, 5, 2, 7, 4);
let r = _mm_setr_pi8(2, 2, 3, 4, 2, 2, 3, 4);
assert_eq_m64(r, _mm_min_pu8(a, b));
assert_eq_m64(r, _m_pminub(a, b));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_mulhi_pu16() {
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _mm_mulhi_pu16(a, b);
assert_eq_m64(r, _mm_set1_pi16(15));
}
#[simd_test = "sse,mmx"]
unsafe fn test_m_pmulhuw() {
let (a, b) = (_mm_set1_pi16(1000), _mm_set1_pi16(1001));
let r = _m_pmulhuw(a, b);
assert_eq_m64(r, _mm_set1_pi16(15));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_avg_pu8() {
let (a, b) = (_mm_set1_pi8(3), _mm_set1_pi8(9));
let r = _mm_avg_pu8(a, b);
assert_eq_m64(r, _mm_set1_pi8(6));
let r = _m_pavgb(a, b);
assert_eq_m64(r, _mm_set1_pi8(6));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_avg_pu16() {
let (a, b) = (_mm_set1_pi16(3), _mm_set1_pi16(9));
let r = _mm_avg_pu16(a, b);
assert_eq_m64(r, _mm_set1_pi16(6));
let r = _m_pavgw(a, b);
assert_eq_m64(r, _mm_set1_pi16(6));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_sad_pu8() {
#[cfg_attr(rustfmt, rustfmt_skip)]
let a = _mm_setr_pi8(
255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
1, 2, 3, 4,
);
let b = _mm_setr_pi8(0, 0, 0, 0, 2, 1, 2, 1);
let r = _mm_sad_pu8(a, b);
assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
let r = _m_psadbw(a, b);
assert_eq_m64(r, _mm_setr_pi16(1020, 0, 0, 0));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpi32_ps() {
let a = _mm_setr_ps(0., 0., 3., 4.);
let b = _mm_setr_pi32(1, 2);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi32_ps(a, b);
assert_eq_m128(r, expected);
let r = _mm_cvt_pi2ps(a, b);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpi16_ps() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi16_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpu16_ps() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpu16_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpi8_ps() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi8_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpu8_ps() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpu8_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtpi32x2_ps() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(3, 4);
let expected = _mm_setr_ps(1., 2., 3., 4.);
let r = _mm_cvtpi32x2_ps(a, b);
assert_eq_m128(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_maskmove_si64() {
let a = _mm_set1_pi8(9);
let mask = _mm_setr_pi8(0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0);
let mut r = _mm_set1_pi8(0);
_mm_maskmove_si64(a, mask, &mut r as *mut _ as *mut i8);
let e = _mm_setr_pi8(0, 0, 9, 0, 0, 0, 0, 0);
assert_eq_m64(r, e);
let mut r = _mm_set1_pi8(0);
_m_maskmovq(a, mask, &mut r as *mut _ as *mut i8);
assert_eq_m64(r, e);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_extract_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_extract_pi16(a, 0);
assert_eq!(r, 1);
let r = _mm_extract_pi16(a, 1);
assert_eq!(r, 2);
let r = _m_pextrw(a, 1);
assert_eq!(r, 2);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_insert_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_insert_pi16(a, 0, 0b0);
let expected = _mm_setr_pi16(0, 2, 3, 4);
assert_eq_m64(r, expected);
let r = _mm_insert_pi16(a, 0, 0b10);
let expected = _mm_setr_pi16(1, 2, 0, 4);
assert_eq_m64(r, expected);
let r = _m_pinsrw(a, 0, 0b10);
assert_eq_m64(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_movemask_pi8() {
let a =
_mm_setr_pi16(0b1000_0000, 0b0100_0000, 0b1000_0000, 0b0100_0000);
let r = _mm_movemask_pi8(a);
assert_eq!(r, 0b10001);
let r = _m_pmovmskb(a);
assert_eq!(r, 0b10001);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_shuffle_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let r = _mm_shuffle_pi16(a, 0b00_01_01_11);
let expected = _mm_setr_pi16(4, 2, 2, 1);
assert_eq_m64(r, expected);
let r = _m_pshufw(a, 0b00_01_01_11);
assert_eq_m64(r, expected);
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtps_pi32() {
let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi32(1, 2);
assert_eq_m64(r, _mm_cvtps_pi32(a));
assert_eq_m64(r, _mm_cvt_ps2pi(a));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvttps_pi32() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi32(7, 2);
assert_eq_m64(r, _mm_cvttps_pi32(a));
assert_eq_m64(r, _mm_cvtt_ps2pi(a));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtps_pi16() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi16(7, 2, 3, 4);
assert_eq_m64(r, _mm_cvtps_pi16(a));
}
#[simd_test = "sse,mmx"]
unsafe fn test_mm_cvtps_pi8() {
let a = _mm_setr_ps(7.0, 2.0, 3.0, 4.0);
let r = _mm_setr_pi8(7, 2, 3, 4, 0, 0, 0, 0);
assert_eq_m64(r, _mm_cvtps_pi8(a));
}
}

View file

@ -2213,6 +2213,113 @@ pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
simd_shuffle2(a, b, [0, 2])
}
/// Adds two signed or unsigned 64-bit integer values, returning the
/// lower 64 bits of the sum.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(paddq))]
pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 {
paddq(a, b)
}
/// Multiplies 32-bit unsigned integer values contained in the lower bits
/// of the two 64-bit integer vectors and returns the 64-bit unsigned
/// product.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(pmuludq))]
pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 {
pmuludq2(a, b)
}
/// Subtracts signed or unsigned 64-bit integer values and writes the
/// difference to the corresponding bits in the destination.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(psubq))]
pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 {
psubq(a, b)
}
/// Converts the two signed 32-bit integer elements of a 64-bit vector of
/// [2 x i32] into two double-precision floating-point values, returned in a
/// 128-bit vector of [2 x double].
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(cvtpi2pd))]
pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d {
cvtpi2pd(a)
}
/// Initializes both 64-bit values in a 128-bit vector of [2 x i64] with
/// the specified 64-bit integer values.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// no particular instruction to test
pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i {
_mm_set_epi64x(mem::transmute(e1), mem::transmute(e0))
}
/// Initializes both values in a 128-bit vector of [2 x i64] with the
/// specified 64-bit value.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// no particular instruction to test
pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i {
_mm_set_epi64x(mem::transmute(a), mem::transmute(a))
}
/// Constructs a 128-bit integer vector, initialized in reverse order
/// with the specified 64-bit integral values.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// no particular instruction to test
pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i {
_mm_set_epi64x(mem::transmute(e0), mem::transmute(e1))
}
/// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit
/// integer.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong
// instr?
pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 {
mem::transmute(simd_extract::<_, i64>(a.as_i64x2(), 0))
}
/// Moves the 64-bit operand to a 128-bit integer vector, zeroing the
/// upper bits.
#[inline]
#[target_feature(enable = "sse2,mmx")]
// #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong
// instr?
pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i {
_mm_set_epi64x(0, mem::transmute(a))
}
/// Converts the two double-precision floating-point elements of a
/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
/// returned in a 64-bit vector of [2 x i32].
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(cvtpd2pi))]
pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 {
cvtpd2pi(a)
}
/// Converts the two double-precision floating-point elements of a
/// 128-bit vector of [2 x double] into two signed 32-bit integer values,
/// returned in a 64-bit vector of [2 x i32].
/// If the result of either conversion is inexact, the result is truncated
/// (rounded towards zero) regardless of the current MXCSR setting.
#[inline]
#[target_feature(enable = "sse2,mmx")]
#[cfg_attr(test, assert_instr(cvttpd2pi))]
pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 {
cvttpd2pi(a)
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.sse2.pause"]
@ -2371,11 +2478,23 @@ extern "C" {
fn storeudq(mem_addr: *mut i8, a: __m128i);
#[link_name = "llvm.x86.sse2.storeu.pd"]
fn storeupd(mem_addr: *mut i8, a: __m128d);
#[link_name = "llvm.x86.mmx.padd.q"]
fn paddq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pmulu.dq"]
fn pmuludq2(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.psub.q"]
fn psubq(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.sse.cvtpi2pd"]
fn cvtpi2pd(a: __m64) -> __m128d;
#[link_name = "llvm.x86.sse.cvtpd2pi"]
fn cvtpd2pi(a: __m128d) -> __m64;
#[link_name = "llvm.x86.sse.cvttpd2pi"]
fn cvttpd2pi(a: __m128d) -> __m64;
}
#[cfg(test)]
mod tests {
use std::mem::transmute;
use std::mem::{self, transmute};
use std::f64::{self, NAN};
use std::f32;
use std::i32;
@ -4452,4 +4571,89 @@ mod tests {
let r = _mm_castsi128_ps(a);
assert_eq_m128(r, expected);
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_add_si64() {
let a = 1i64;
let b = 2i64;
let expected = 3i64;
let r = _mm_add_si64(mem::transmute(a), mem::transmute(b));
assert_eq!(mem::transmute::<__m64, i64>(r), expected);
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_mul_su32() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(3, 4);
let expected = 3u64;
let r = _mm_mul_su32(a, b);
assert_eq_m64(r, mem::transmute(expected));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_sub_si64() {
let a = 1i64;
let b = 2i64;
let expected = -1i64;
let r = _mm_sub_si64(mem::transmute(a), mem::transmute(b));
assert_eq!(mem::transmute::<__m64, i64>(r), expected);
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_cvtpi32_pd() {
let a = _mm_setr_pi32(1, 2);
let expected = _mm_setr_pd(1., 2.);
let r = _mm_cvtpi32_pd(a);
assert_eq_m128d(r, expected);
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_set_epi64() {
let r = _mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64));
assert_eq_m128i(r, _mm_setr_epi64x(2, 1));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_set1_epi64() {
let r = _mm_set1_epi64(mem::transmute(1i64));
assert_eq_m128i(r, _mm_setr_epi64x(1, 1));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_setr_epi64() {
let r = _mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64));
assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_movepi64_pi64() {
let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0));
assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_movpi64_epi64() {
let r = _mm_movpi64_epi64(_mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0));
assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_cvtpd_pi32() {
let a = _mm_setr_pd(5., 0.);
let r = _mm_cvtpd_pi32(a);
assert_eq_m64(r, _mm_setr_pi32(5, 0));
}
#[simd_test = "sse2,mmx"]
unsafe fn test_mm_cvttpd_pi32() {
use std::{f64, i32};
let a = _mm_setr_pd(5., 0.);
let r = _mm_cvttpd_pi32(a);
assert_eq_m64(r, _mm_setr_pi32(5, 0));
let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
let r = _mm_cvttpd_pi32(a);
assert_eq_m64(r, _mm_setr_pi32(i32::MIN, i32::MIN));
}
}

View file

@ -797,6 +797,125 @@ pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
mem::transmute(constify_imm3!(imm8, call))
}
/// Tests whether the specified bits in a 128-bit integer vector are all
/// zeros.
///
/// Arguments:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
/// * `mask` - A 128-bit integer vector selecting which bits to test in
/// operand `a`.
///
/// Returns:
///
/// * `1` - if the specified bits are all zeros,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
ptestz(a.as_i64x2(), mask.as_i64x2())
}
/// Tests whether the specified bits in a 128-bit integer vector are all
/// ones.
///
/// Arguments:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
/// * `mask` - A 128-bit integer vector selecting which bits to test in
/// operand `a`.
///
/// Returns:
///
/// * `1` - if the specified bits are all ones,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestc(a.as_i64x2(), mask.as_i64x2())
}
/// Tests whether the specified bits in a 128-bit integer vector are
/// neither all zeros nor all ones.
///
/// Arguments:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
/// * `mask` - A 128-bit integer vector selecting which bits to test in
/// operand `a`.
///
/// Returns:
///
/// * `1` - if the specified bits are neither all zeros nor all ones,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestnzc(a.as_i64x2(), mask.as_i64x2())
}
/// Tests whether the specified bits in a 128-bit integer vector are all
/// zeros.
///
/// Arguments:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
/// * `mask` - A 128-bit integer vector selecting which bits to test in
/// operand `a`.
///
/// Returns:
///
/// * `1` - if the specified bits are all zeros,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
_mm_testz_si128(a, mask)
}
/// Tests whether the specified bits in `a` 128-bit integer vector are all
/// ones.
///
/// Argument:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
///
/// Returns:
///
/// * `1` - if the bits specified in the operand are all set to 1,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pcmpeqd))]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
_mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
}
/// Tests whether the specified bits in a 128-bit integer vector are
/// neither all zeros nor all ones.
///
/// Arguments:
///
/// * `a` - A 128-bit integer vector containing the bits to be tested.
/// * `mask` - A 128-bit integer vector selecting which bits to test in
/// operand `a`.
///
/// Returns:
///
/// * `1` - if the specified bits are neither all zeros nor all ones,
/// * `0` - otherwise.
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
_mm_testnzc_si128(a, mask)
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.sse41.pblendvb"]
@ -849,6 +968,12 @@ extern "C" {
fn pmuldq(a: i32x4, b: i32x4) -> i64x2;
#[link_name = "llvm.x86.sse41.mpsadbw"]
fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
#[link_name = "llvm.x86.sse41.ptestz"]
fn ptestz(a: i64x2, mask: i64x2) -> i32;
#[link_name = "llvm.x86.sse41.ptestc"]
fn ptestc(a: i64x2, mask: i64x2) -> i32;
#[link_name = "llvm.x86.sse41.ptestnzc"]
fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
}
#[cfg(test)]
@ -1476,4 +1601,102 @@ mod tests {
let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
assert_eq_m128i(r, e);
}
#[simd_test = "sse4.1"]
unsafe fn test_mm_testz_si128() {
let a = _mm_set1_epi8(1);
let mask = _mm_set1_epi8(0);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b011);
let mask = _mm_set1_epi8(0b100);
let r = _mm_testz_si128(a, mask);
assert_eq!(r, 1);
}
#[simd_test = "sse4.1"]
unsafe fn test_mm_testc_si128() {
let a = _mm_set1_epi8(-1);
let mask = _mm_set1_epi8(0);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b100);
let r = _mm_testc_si128(a, mask);
assert_eq!(r, 1);
}
#[simd_test = "sse4.1"]
unsafe fn test_mm_testnzc_si128() {
let a = _mm_set1_epi8(0);
let mask = _mm_set1_epi8(1);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(-1);
let mask = _mm_set1_epi8(0);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b101);
let r = _mm_testnzc_si128(a, mask);
assert_eq!(r, 0);
}
#[simd_test = "sse4.1"]
unsafe fn test_mm_test_all_zeros() {
let a = _mm_set1_epi8(1);
let mask = _mm_set1_epi8(0);
let r = _mm_test_all_zeros(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_test_all_zeros(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b011);
let mask = _mm_set1_epi8(0b100);
let r = _mm_test_all_zeros(a, mask);
assert_eq!(r, 1);
}
#[simd_test = "sse4.1"]
unsafe fn test_mm_test_all_ones() {
let a = _mm_set1_epi8(-1);
let r = _mm_test_all_ones(a);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let r = _mm_test_all_ones(a);
assert_eq!(r, 0);
}
#[simd_test = "sse4.1"]
unsafe fn test_mm_test_mix_ones_zeros() {
let a = _mm_set1_epi8(0);
let mask = _mm_set1_epi8(1);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(-1);
let mask = _mm_set1_epi8(0);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 0);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b110);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 1);
let a = _mm_set1_epi8(0b101);
let mask = _mm_set1_epi8(0b101);
let r = _mm_test_mix_ones_zeros(a, mask);
assert_eq!(r, 0);
}
}

View file

@ -5,6 +5,7 @@
#[cfg(test)]
use stdsimd_test::assert_instr;
use coresimd::simd_llvm::*;
use coresimd::v128::*;
use coresimd::x86::*;
@ -601,6 +602,15 @@ pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
crc32_32_32(crc, v)
}
/// Compare packed 64-bit integers in `a` and `b` for greater-than,
/// return the results.
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpgtq))]
pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
}
#[allow(improper_ctypes)]
extern "C" {
// SSE 4.2 string and text comparison ops
@ -826,4 +836,15 @@ mod tests {
let i = _mm_crc32_u32(crc, v);
assert_eq!(i, 0xffae2ed1);
}
#[simd_test = "sse4.2"]
unsafe fn test_mm_cmpgt_epi64() {
let a = _mm_setr_epi64x(0, 0x2a);
let b = _mm_set1_epi64x(0x00);
let i = _mm_cmpgt_epi64(a, b);
assert_eq_m128i(
i,
_mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64),
);
}
}

View file

@ -239,6 +239,169 @@ pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
}
/// Compute the absolute value of packed 8-bit integers in `a` and
/// return the unsigned results.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pabsb))]
pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
pabsb(a)
}
/// Compute the absolute value of packed 8-bit integers in `a`, and return the
/// unsigned results.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pabsw))]
pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
pabsw(a)
}
/// Compute the absolute value of packed 32-bit integers in `a`, and return the
/// unsigned results.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pabsd))]
pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
pabsd(a)
}
/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
/// the corresponding 8-bit element of `b`, and return the results
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pshufb))]
pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
pshufb(a, b)
}
/// Concatenates the two 64-bit integer vector operands, and right-shifts
/// the result by the number of bytes specified in the immediate operand.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(palignr, n = 15))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
macro_rules! call {
($imm8:expr) => {
palignrb(a, b, $imm8)
}
}
constify_imm8!(n, call)
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16].
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phaddw))]
pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
phaddw(a, b)
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [2 x i32].
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phaddd))]
pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
phaddd(a, b)
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phaddsw))]
pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
phaddsw(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [4 x i16].
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phsubw))]
pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
phsubw(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [2 x i32].
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phsubd))]
pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
phsubd(a, b)
}
/// Horizontally subtracts the adjacent pairs of values contained in 2
/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
/// saturated to 8000h.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(phsubsw))]
pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
phsubsw(a, b)
}
/// Multiplies corresponding pairs of packed 8-bit unsigned integer
/// values contained in the first source operand and packed 8-bit signed
/// integer values contained in the second source operand, adds pairs of
/// contiguous products with signed saturation, and writes the 16-bit sums to
/// the corresponding bits in the destination.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pmaddubsw))]
pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
pmaddubsw(a, b)
}
/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
/// products to the 18 most significant bits by right-shifting, rounds the
/// truncated value by adding 1, and writes bits [16:1] to the destination.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(pmulhrsw))]
pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
pmulhrsw(a, b)
}
/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
/// integer in `b` is negative, and return the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(psignb))]
pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
psignb(a, b)
}
/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
/// integer in `b` is negative, and return the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(psignw))]
pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
psignw(a, b)
}
/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
/// integer in `b` is negative, and return the results.
/// Element in result are zeroed out when the corresponding element in `b` is
/// zero.
#[inline]
#[target_feature(enable = "ssse3,mmx")]
#[cfg_attr(test, assert_instr(psignd))]
pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
psignd(a, b)
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.ssse3.pabs.b.128"]
@ -285,6 +448,54 @@ extern "C" {
#[link_name = "llvm.x86.ssse3.psign.d.128"]
fn psignd128(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.ssse3.pabs.b"]
fn pabsb(a: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pabs.w"]
fn pabsw(a: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pabs.d"]
fn pabsd(a: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pshuf.b"]
fn pshufb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.palignr.b"]
fn palignrb(a: __m64, b: __m64, n: u8) -> __m64;
#[link_name = "llvm.x86.ssse3.phadd.w"]
fn phaddw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phadd.d"]
fn phaddd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phadd.sw"]
fn phaddsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phsub.w"]
fn phsubw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phsub.d"]
fn phsubd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.phsub.sw"]
fn phsubsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pmadd.ub.sw"]
fn pmaddubsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.pmul.hr.sw"]
fn pmulhrsw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.psign.b"]
fn psignb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.psign.w"]
fn psignw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.ssse3.psign.d"]
fn psignd(a: __m64, b: __m64) -> __m64;
}
#[cfg(test)]
@ -491,4 +702,138 @@ mod tests {
let r = _mm_sign_epi32(a, b);
assert_eq_m128i(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_abs_pi8() {
let r = _mm_abs_pi8(_mm_set1_pi8(-5));
assert_eq_m64(r, _mm_set1_pi8(5));
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_abs_pi16() {
let r = _mm_abs_pi16(_mm_set1_pi16(-5));
assert_eq_m64(r, _mm_set1_pi16(5));
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_abs_pi32() {
let r = _mm_abs_pi32(_mm_set1_pi32(-5));
assert_eq_m64(r, _mm_set1_pi32(5));
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_shuffle_pi8() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19);
let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4);
let r = _mm_shuffle_pi8(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_alignr_pi8() {
let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32);
let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32);
let r = _mm_alignr_pi8(a, b, 4);
assert_eq_m64(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hadd_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 128, 4, 3);
let expected = _mm_setr_pi16(3, 7, 132, 7);
let r = _mm_hadd_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hadd_pi32() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(4, 128);
let expected = _mm_setr_pi32(3, 132);
let r = _mm_hadd_pi32(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hadds_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(32767, 1, -32768, -1);
let expected = _mm_setr_pi16(3, 7, 32767, -32768);
let r = _mm_hadds_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hsub_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 128, 4, 3);
let expected = _mm_setr_pi16(-1, -1, -124, 1);
let r = _mm_hsub_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hsub_pi32() {
let a = _mm_setr_pi32(1, 2);
let b = _mm_setr_pi32(4, 128);
let expected = _mm_setr_pi32(-1, -124);
let r = _mm_hsub_pi32(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_hsubs_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 128, 4, 3);
let expected = _mm_setr_pi16(-1, -1, -124, 1);
let r = _mm_hsubs_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_maddubs_pi16() {
let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19);
let expected = _mm_setr_pi16(130, 24, 192, 194);
let r = _mm_maddubs_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_mulhrs_pi16() {
let a = _mm_setr_pi16(1, 2, 3, 4);
let b = _mm_setr_pi16(4, 32767, -1, -32768);
let expected = _mm_setr_pi16(0, 2, 0, -4);
let r = _mm_mulhrs_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_sign_pi8() {
let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8);
let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1);
let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8);
let r = _mm_sign_pi8(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_sign_pi16() {
let a = _mm_setr_pi16(-1, 2, 3, 4);
let b = _mm_setr_pi16(1, -1, 1, 0);
let expected = _mm_setr_pi16(-1, -2, 3, 0);
let r = _mm_sign_pi16(a, b);
assert_eq_m64(r, expected);
}
#[simd_test = "ssse3,mmx"]
unsafe fn test_mm_sign_pi32() {
let a = _mm_setr_pi32(-1, 2);
let b = _mm_setr_pi32(1, 0);
let expected = _mm_setr_pi32(-1, 0);
let r = _mm_sign_pi32(a, b);
assert_eq_m64(r, expected);
}
}

View file

@ -263,41 +263,41 @@ pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::i586::tbm;
use coresimd::x86::*;
/*
#[simd_test = "tbm"]
unsafe fn _bextr_u32() {
assert_eq!(tbm::_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32);
unsafe fn test_bextr_u32() {
assert_eq!(_bextr_u32(0b0101_0000u32, 4, 4), 0b0000_0101u32);
}
#[simd_test = "tbm"]
unsafe fn _bextr_u64() {
assert_eq!(tbm::_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64);
unsafe fn test_bextr_u64() {
assert_eq!(_bextr_u64(0b0101_0000u64, 4, 4), 0b0000_0101u64);
}
*/
#[simd_test = "tbm"]
unsafe fn _blcfill_u32() {
assert_eq!(tbm::_blcfill_u32(0b0101_0111u32), 0b0101_0000u32);
assert_eq!(tbm::_blcfill_u32(0b1111_1111u32), 0u32);
unsafe fn test_blcfill_u32() {
assert_eq!(_blcfill_u32(0b0101_0111u32), 0b0101_0000u32);
assert_eq!(_blcfill_u32(0b1111_1111u32), 0u32);
}
#[simd_test = "tbm"]
#[cfg(not(target_arch = "x86"))]
unsafe fn _blcfill_u64() {
assert_eq!(tbm::_blcfill_u64(0b0101_0111u64), 0b0101_0000u64);
assert_eq!(tbm::_blcfill_u64(0b1111_1111u64), 0u64);
unsafe fn test_blcfill_u64() {
assert_eq!(_blcfill_u64(0b0101_0111u64), 0b0101_0000u64);
assert_eq!(_blcfill_u64(0b1111_1111u64), 0u64);
}
#[simd_test = "tbm"]
unsafe fn _blci_u32() {
unsafe fn test_blci_u32() {
assert_eq!(
tbm::_blci_u32(0b0101_0000u32),
_blci_u32(0b0101_0000u32),
0b1111_1111_1111_1111_1111_1111_1111_1110u32
);
assert_eq!(
tbm::_blci_u32(0b1111_1111u32),
_blci_u32(0b1111_1111u32),
0b1111_1111_1111_1111_1111_1110_1111_1111u32
);
}
@ -305,61 +305,61 @@ mod tests {
#[simd_test = "tbm"]
#[cfg(not(target_arch = "x86"))]
#[cfg_attr(rustfmt, rustfmt_skip)]
unsafe fn _blci_u64() {
unsafe fn test_blci_u64() {
assert_eq!(
tbm::_blci_u64(0b0101_0000u64),
_blci_u64(0b0101_0000u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110u64
);
assert_eq!(
tbm::_blci_u64(0b1111_1111u64),
_blci_u64(0b1111_1111u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1110_1111_1111u64
);
}
#[simd_test = "tbm"]
unsafe fn _blcic_u32() {
assert_eq!(tbm::_blcic_u32(0b0101_0001u32), 0b0000_0010u32);
assert_eq!(tbm::_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32);
unsafe fn test_blcic_u32() {
assert_eq!(_blcic_u32(0b0101_0001u32), 0b0000_0010u32);
assert_eq!(_blcic_u32(0b1111_1111u32), 0b1_0000_0000u32);
}
#[simd_test = "tbm"]
#[cfg(not(target_arch = "x86"))]
unsafe fn _blcic_u64() {
assert_eq!(tbm::_blcic_u64(0b0101_0001u64), 0b0000_0010u64);
assert_eq!(tbm::_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64);
unsafe fn test_blcic_u64() {
assert_eq!(_blcic_u64(0b0101_0001u64), 0b0000_0010u64);
assert_eq!(_blcic_u64(0b1111_1111u64), 0b1_0000_0000u64);
}
#[simd_test = "tbm"]
unsafe fn _blcmsk_u32() {
assert_eq!(tbm::_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32);
assert_eq!(tbm::_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32);
unsafe fn test_blcmsk_u32() {
assert_eq!(_blcmsk_u32(0b0101_0001u32), 0b0000_0011u32);
assert_eq!(_blcmsk_u32(0b1111_1111u32), 0b1_1111_1111u32);
}
#[simd_test = "tbm"]
#[cfg(not(target_arch = "x86"))]
unsafe fn _blcmsk_u64() {
assert_eq!(tbm::_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64);
assert_eq!(tbm::_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64);
unsafe fn test_blcmsk_u64() {
assert_eq!(_blcmsk_u64(0b0101_0001u64), 0b0000_0011u64);
assert_eq!(_blcmsk_u64(0b1111_1111u64), 0b1_1111_1111u64);
}
#[simd_test = "tbm"]
unsafe fn _blcs_u32() {
assert_eq!(tbm::_blcs_u32(0b0101_0001u32), 0b0101_0011u32);
assert_eq!(tbm::_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32);
unsafe fn test_blcs_u32() {
assert_eq!(_blcs_u32(0b0101_0001u32), 0b0101_0011u32);
assert_eq!(_blcs_u32(0b1111_1111u32), 0b1_1111_1111u32);
}
#[simd_test = "tbm"]
#[cfg(not(target_arch = "x86"))]
unsafe fn _blcs_u64() {
assert_eq!(tbm::_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
assert_eq!(tbm::_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
unsafe fn test_blcs_u64() {
assert_eq!(_blcs_u64(0b0101_0001u64), 0b0101_0011u64);
assert_eq!(_blcs_u64(0b1111_1111u64), 0b1_1111_1111u64);
}
#[simd_test = "tbm"]
unsafe fn _blsfill_u32() {
assert_eq!(tbm::_blsfill_u32(0b0101_0100u32), 0b0101_0111u32);
unsafe fn test_blsfill_u32() {
assert_eq!(_blsfill_u32(0b0101_0100u32), 0b0101_0111u32);
assert_eq!(
tbm::_blsfill_u32(0u32),
_blsfill_u32(0u32),
0b1111_1111_1111_1111_1111_1111_1111_1111u32
);
}
@ -367,22 +367,22 @@ mod tests {
#[simd_test = "tbm"]
#[cfg(not(target_arch = "x86"))]
#[cfg_attr(rustfmt, rustfmt_skip)]
unsafe fn _blsfill_u64() {
assert_eq!(tbm::_blsfill_u64(0b0101_0100u64), 0b0101_0111u64);
unsafe fn test_blsfill_u64() {
assert_eq!(_blsfill_u64(0b0101_0100u64), 0b0101_0111u64);
assert_eq!(
tbm::_blsfill_u64(0u64),
_blsfill_u64(0u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
);
}
#[simd_test = "tbm"]
unsafe fn _blsic_u32() {
unsafe fn test_blsic_u32() {
assert_eq!(
tbm::_blsic_u32(0b0101_0100u32),
_blsic_u32(0b0101_0100u32),
0b1111_1111_1111_1111_1111_1111_1111_1011u32
);
assert_eq!(
tbm::_blsic_u32(0u32),
_blsic_u32(0u32),
0b1111_1111_1111_1111_1111_1111_1111_1111u32
);
}
@ -390,25 +390,25 @@ mod tests {
#[simd_test = "tbm"]
#[cfg(not(target_arch = "x86"))]
#[cfg_attr(rustfmt, rustfmt_skip)]
unsafe fn _blsic_u64() {
unsafe fn test_blsic_u64() {
assert_eq!(
tbm::_blsic_u64(0b0101_0100u64),
_blsic_u64(0b0101_0100u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1011u64
);
assert_eq!(
tbm::_blsic_u64(0u64),
_blsic_u64(0u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
);
}
#[simd_test = "tbm"]
unsafe fn _t1mskc_u32() {
unsafe fn test_t1mskc_u32() {
assert_eq!(
tbm::_t1mskc_u32(0b0101_0111u32),
_t1mskc_u32(0b0101_0111u32),
0b1111_1111_1111_1111_1111_1111_1111_1000u32
);
assert_eq!(
tbm::_t1mskc_u32(0u32),
_t1mskc_u32(0u32),
0b1111_1111_1111_1111_1111_1111_1111_1111u32
);
}
@ -416,27 +416,27 @@ mod tests {
#[simd_test = "tbm"]
#[cfg(not(target_arch = "x86"))]
#[cfg_attr(rustfmt, rustfmt_skip)]
unsafe fn _t1mksc_u64() {
unsafe fn test_t1mksc_u64() {
assert_eq!(
tbm::_t1mskc_u64(0b0101_0111u64),
_t1mskc_u64(0b0101_0111u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1000u64
);
assert_eq!(
tbm::_t1mskc_u64(0u64),
_t1mskc_u64(0u64),
0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111u64
);
}
#[simd_test = "tbm"]
unsafe fn _tzmsk_u32() {
assert_eq!(tbm::_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32);
assert_eq!(tbm::_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32);
unsafe fn test_tzmsk_u32() {
assert_eq!(_tzmsk_u32(0b0101_1000u32), 0b0000_0111u32);
assert_eq!(_tzmsk_u32(0b0101_1001u32), 0b0000_0000u32);
}
#[simd_test = "tbm"]
#[cfg(not(target_arch = "x86"))]
unsafe fn _tzmsk_u64() {
assert_eq!(tbm::_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64);
assert_eq!(tbm::_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64);
unsafe fn test_tzmsk_u64() {
assert_eq!(_tzmsk_u64(0b0101_1000u64), 0b0000_0111u64);
assert_eq!(_tzmsk_u64(0b0101_1001u64), 0b0000_0000u64);
}
}

View file

@ -103,7 +103,7 @@ pub unsafe fn get_m256(a: __m256, idx: usize) -> f32 {
A { a }.b[idx]
}
// These intrinsics doesn't exist on x86 b/c it requires a 64-bit registe,r
// These intrinsics doesn't exist on x86 b/c it requires a 64-bit register,
// which doesn't exist on x86!
#[cfg(target_arch = "x86")]
mod x86_polyfill {
@ -132,5 +132,8 @@ mod x86_polyfill {
a.a
}
}
#[cfg(target_arch = "x86")]
#[cfg(target_arch = "x86_64")]
mod x86_polyfill {
pub use coresimd::x86_64::{_mm_insert_epi64, _mm256_insert_epi64};
}
pub use self::x86_polyfill::*;

View file

@ -54,7 +54,7 @@ pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) {
/// `XFEATURE_ENABLED_MASK` for `XCR`
///
/// This intrinsic maps to `XSETBV` instruction.
const _XCR_XFEATURE_ENABLED_MASK: u32 = 0;
pub const _XCR_XFEATURE_ENABLED_MASK: u32 = 0;
/// Copy 64-bits from `val` to the extended control register (`XCR`) specified
/// by `a`.
@ -141,7 +141,7 @@ mod tests {
use std::fmt;
use std::prelude::v1::*;
use coresimd::x86::i586::xsave;
use coresimd::x86::*;
use stdsimd_test::simd_test;
#[repr(align(64))]
@ -194,23 +194,23 @@ mod tests {
let mut a = XsaveArea::new();
let mut b = XsaveArea::new();
xsave::_xsave(a.ptr(), m);
xsave::_xrstor(a.ptr(), m);
xsave::_xsave(b.ptr(), m);
_xsave(a.ptr(), m);
_xrstor(a.ptr(), m);
_xsave(b.ptr(), m);
assert_eq!(a, b);
}
*/
#[simd_test = "xsave"]
unsafe fn xgetbv_xsetbv() {
let xcr_n: u32 = xsave::_XCR_XFEATURE_ENABLED_MASK;
let xcr_n: u32 = _XCR_XFEATURE_ENABLED_MASK;
let xcr: u64 = xsave::_xgetbv(xcr_n);
let xcr: u64 = _xgetbv(xcr_n);
// FIXME: XSETBV is a privileged instruction we should only test this
// when running in privileged mode:
//
// _xsetbv(xcr_n, xcr);
let xcr_cpy: u64 = xsave::_xgetbv(xcr_n);
let xcr_cpy: u64 = _xgetbv(xcr_n);
assert_eq!(xcr, xcr_cpy);
}
@ -222,9 +222,9 @@ mod tests {
let mut a = XsaveArea::new();
let mut b = XsaveArea::new();
xsave::_xsaveopt(a.ptr(), m);
xsave::_xrstor(a.ptr(), m);
xsave::_xsaveopt(b.ptr(), m);
_xsaveopt(a.ptr(), m);
_xrstor(a.ptr(), m);
_xsaveopt(b.ptr(), m);
assert_eq!(a, b);
}
*/
@ -237,9 +237,9 @@ mod tests {
let mut a = XsaveArea::new();
let mut b = XsaveArea::new();
xsave::_xsavec(a.ptr(), m);
xsave::_xrstor(a.ptr(), m);
xsave::_xsavec(b.ptr(), m);
_xsavec(a.ptr(), m);
_xrstor(a.ptr(), m);
_xsavec(b.ptr(), m);
assert_eq!(a, b);
}
@ -251,9 +251,9 @@ mod tests {
let mut a = XsaveArea::new();
let mut b = XsaveArea::new();
xsave::_xsaves(a.ptr(), m);
xsave::_xrstors(a.ptr(), m);
xsave::_xsaves(b.ptr(), m);
_xsaves(a.ptr(), m);
_xrstors(a.ptr(), m);
_xsaves(b.ptr(), m);
assert_eq!(a, b);
}
*/

View file

@ -42,7 +42,7 @@ pub unsafe fn _popcnt64(x: i64) -> i32 {
mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::*;
use coresimd::arch::x86_64::*;
#[simd_test = "lzcnt"]
unsafe fn test_lzcnt_u64() {

View file

@ -35,7 +35,7 @@ pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 {
mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::*;
use coresimd::arch::x86_64::*;
#[simd_test = "avx2"]
unsafe fn test_mm256_extract_epi64() {

View file

@ -102,6 +102,7 @@ mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::*;
use coresimd::x86_64::*;
#[simd_test = "bmi"]
unsafe fn test_bextr_u64() {

View file

@ -69,7 +69,7 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::*;
use coresimd::x86_64::*;
#[simd_test = "bmi2"]
unsafe fn test_pext_u64() {

View file

@ -66,7 +66,7 @@ mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::*;
use coresimd::arch::x86_64::*;
#[simd_test = "sse"]
unsafe fn test_mm_cvtss_si64() {

View file

@ -117,7 +117,7 @@ mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::*;
use coresimd::arch::x86_64::*;
#[simd_test = "sse2"]
unsafe fn test_mm_cvtsd_si64() {

View file

@ -31,7 +31,7 @@ pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i {
#[cfg(test)]
mod tests {
use stdsimd_test::simd_test;
use coresimd::x86::*;
use coresimd::arch::x86_64::*;
#[simd_test = "sse4.1"]
unsafe fn test_mm_extract_epi64() {

View file

@ -20,7 +20,7 @@ pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {
#[cfg(test)]
mod tests {
use coresimd::x86::*;
use coresimd::arch::x86_64::*;
use stdsimd_test::simd_test;

View file

@ -22,10 +22,10 @@ macro_rules! my_quote {
pub fn x86_functions(input: TokenStream) -> TokenStream {
let dir = Path::new(env!("CARGO_MANIFEST_DIR"));
let root = dir.parent().unwrap();
let root = root.join("../coresimd/x86");
let mut files = Vec::new();
walk(&root, &mut files);
walk(&root.join("../coresimd/x86"), &mut files);
walk(&root.join("../coresimd/x86_64"), &mut files);
assert!(files.len() > 0);
let mut functions = Vec::new();