Stabilize x86/x86_64 intrinsics (#414)

This commit stabilizes all intrinsics in the `x86` and `x86_64` modules, namely
allowing stabilization of the `arch::x86` and `arch::x86_64` module in libstd.
Stabilizations here were applied in an automated fashion using [this
script][scr], and notably everything related to `__m64` was omitted from this
round of stabilization

[scr]: https://gist.github.com/alexcrichton/5b456d495d6fe1df46a158754565c7a5
This commit is contained in:
Alex Crichton 2018-04-13 09:32:22 -05:00 committed by GitHub
parent b89963711d
commit f650b93003
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
42 changed files with 2897 additions and 15 deletions

View file

@ -20,8 +20,10 @@ pub use self::v7::*;
// NEON is supported on AArch64, and on ARM when built with the v7 and neon
// features. Building ARM without neon produces incorrect codegen.
#[cfg(any(target_arch = "aarch64",
all(target_feature = "v7", target_feature = "neon")))]
all(target_feature = "v7", target_feature = "neon"),
dox))]
mod neon;
#[cfg(any(target_arch = "aarch64",
all(target_feature = "v7", target_feature = "neon")))]
all(target_feature = "v7", target_feature = "neon"),
dox))]
pub use self::neon::*;

View file

@ -41,14 +41,16 @@ pub mod simd {
/// [`aarch64`]: https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/arch/index.html
/// [`mips`]: https://rust-lang-nursery.github.io/stdsimd/mips/stdsimd/arch/index.html
/// [`mips64`]: https://rust-lang-nursery.github.io/stdsimd/mips64/stdsimd/arch/index.html
#[unstable(feature = "stdsimd", issue = "0")]
#[stable(feature = "simd_arch", since = "1.27.0")]
pub mod arch {
/// Platform-specific intrinsics for the `x86` platform.
///
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "x86", dox))]
#[doc(cfg(target_arch = "x86"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub mod x86 {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use coresimd::x86::*;
}
@ -57,8 +59,11 @@ pub mod arch {
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "x86_64", dox))]
#[doc(cfg(target_arch = "x86_64"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub mod x86_64 {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use coresimd::x86::*;
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use coresimd::x86_64::*;
}
@ -67,6 +72,7 @@ pub mod arch {
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "arm", dox))]
#[doc(cfg(target_arch = "arm"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub mod arm {
pub use coresimd::arm::*;
}
@ -76,6 +82,7 @@ pub mod arch {
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "aarch64", dox))]
#[doc(cfg(target_arch = "aarch64"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub mod aarch64 {
pub use coresimd::aarch64::*;
pub use coresimd::arm::*;
@ -85,6 +92,7 @@ pub mod arch {
///
/// See the [module documentation](../index.html) for more details.
#[cfg(target_arch = "wasm32")]
#[unstable(feature = "stdsimd", issue = "0")]
pub mod wasm32 {
pub use coresimd::wasm32::*;
}
@ -94,6 +102,7 @@ pub mod arch {
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "mips", dox))]
#[doc(cfg(target_arch = "mips"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub mod mips {
pub use coresimd::mips::*;
}
@ -103,6 +112,7 @@ pub mod arch {
/// See the [module documentation](../index.html) for more details.
#[cfg(any(target_arch = "mips64", dox))]
#[doc(cfg(target_arch = "mips64"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub mod mips64 {
pub use coresimd::mips::*;
}

View file

@ -23,17 +23,23 @@ use stdsimd_test::assert_instr;
/// Counts the leading most significant zero bits.
///
/// When the operand is zero, it returns its size in bits.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u32)
#[inline]
#[target_feature(enable = "lzcnt")]
#[cfg_attr(test, assert_instr(lzcnt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _lzcnt_u32(x: u32) -> u32 {
x.leading_zeros()
}
/// Counts the bits that are set.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt32)
#[inline]
#[target_feature(enable = "popcnt")]
#[cfg_attr(test, assert_instr(popcnt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _popcnt32(x: i32) -> i32 {
x.count_ones() as i32
}

View file

@ -29,41 +29,56 @@ extern "C" {
}
/// Perform one round of an AES decryption flow on data (state) in `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdec_si128)
#[inline]
#[target_feature(enable = "aes")]
#[cfg_attr(test, assert_instr(aesdec))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_aesdec_si128(a: __m128i, round_key: __m128i) -> __m128i {
aesdec(a, round_key)
}
/// Perform the last round of an AES decryption flow on data (state) in `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdeclast_si128)
#[inline]
#[target_feature(enable = "aes")]
#[cfg_attr(test, assert_instr(aesdeclast))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_aesdeclast_si128(a: __m128i, round_key: __m128i) -> __m128i {
aesdeclast(a, round_key)
}
/// Perform one round of an AES encryption flow on data (state) in `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenc_si128)
#[inline]
#[target_feature(enable = "aes")]
#[cfg_attr(test, assert_instr(aesenc))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_aesenc_si128(a: __m128i, round_key: __m128i) -> __m128i {
aesenc(a, round_key)
}
/// Perform the last round of an AES encryption flow on data (state) in `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128)
#[inline]
#[target_feature(enable = "aes")]
#[cfg_attr(test, assert_instr(aesenclast))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_aesenclast_si128(a: __m128i, round_key: __m128i) -> __m128i {
aesenclast(a, round_key)
}
/// Perform the `InvMixColumns` transformation on `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesimc_si128)
#[inline]
#[target_feature(enable = "aes")]
#[cfg_attr(test, assert_instr(aesimc))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i {
aesimc(a)
}
@ -73,10 +88,13 @@ pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i {
/// Assist in expanding the AES cipher key by computing steps towards
/// generating a round key for encryption cipher using data from `a` and an
/// 8-bit round constant `imm8`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aeskeygenassist_si128)
#[inline]
#[target_feature(enable = "aes")]
#[cfg_attr(test, assert_instr(aeskeygenassist, imm8 = 0))]
#[rustc_args_required_const(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_aeskeygenassist_si128(a: __m128i, imm8: i32) -> __m128i {
macro_rules! call {
($imm8:expr) => {

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -14,9 +14,12 @@ use stdsimd_test::assert_instr;
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u32)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(bextr))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
_bextr2_u32(
a,
@ -29,33 +32,45 @@ pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
///
/// Bits [7,0] of `control` specify the index to the first bit in the range to
/// be extracted, and bits [15,8] specify the length of the range.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u32)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(bextr))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _bextr2_u32(a: u32, control: u32) -> u32 {
x86_bmi_bextr_32(a, control)
}
/// Bitwise logical `AND` of inverted `a` with `b`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u32)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(andn))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _andn_u32(a: u32, b: u32) -> u32 {
!a & b
}
/// Extract lowest set isolated bit.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u32)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(blsi))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsi_u32(x: u32) -> u32 {
x & x.wrapping_neg()
}
/// Get mask up to lowest set bit.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u32)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(blsmsk))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsmsk_u32(x: u32) -> u32 {
x ^ (x.wrapping_sub(1_u32))
}
@ -63,9 +78,12 @@ pub unsafe fn _blsmsk_u32(x: u32) -> u32 {
/// Resets the lowest set bit of `x`.
///
/// If `x` is sets CF.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u32)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(blsr))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsr_u32(x: u32) -> u32 {
x & (x.wrapping_sub(1))
}
@ -73,9 +91,12 @@ pub unsafe fn _blsr_u32(x: u32) -> u32 {
/// Counts the number of trailing least significant zero bits.
///
/// When the source operand is 0, it returns its size in bits.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u32)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(tzcnt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _tzcnt_u32(x: u32) -> u32 {
x.trailing_zeros()
}
@ -83,9 +104,12 @@ pub unsafe fn _tzcnt_u32(x: u32) -> u32 {
/// Counts the number of trailing least significant zero bits.
///
/// When the source operand is 0, it returns its size in bits.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_32)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(tzcnt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_tzcnt_32(x: u32) -> i32 {
x.trailing_zeros() as i32
}

View file

@ -17,11 +17,14 @@ use stdsimd_test::assert_instr;
///
/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
/// the low half and the high half of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u32)
#[inline]
// LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(imul))]
#[cfg_attr(all(test, target_arch = "x86"), assert_instr(mulx))]
#[target_feature(enable = "bmi2")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 {
let result: u64 = (a as u64) * (b as u64);
*hi = (result >> 32) as u32;
@ -29,27 +32,36 @@ pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 {
}
/// Zero higher bits of `a` >= `index`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u32)
#[inline]
#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(bzhi))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _bzhi_u32(a: u32, index: u32) -> u32 {
x86_bmi2_bzhi_32(a, index)
}
/// Scatter contiguous low order bits of `a` to the result at the positions
/// specified by the `mask`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u32)
#[inline]
#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(pdep))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _pdep_u32(a: u32, mask: u32) -> u32 {
x86_bmi2_pdep_32(a, mask)
}
/// Gathers the bits of `x` specified by the `mask` into the contiguous low
/// order bit positions of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u32)
#[inline]
#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(pext))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _pext_u32(a: u32, mask: u32) -> u32 {
x86_bmi2_pext_32(a, mask)
}

View file

@ -6,8 +6,11 @@
use stdsimd_test::assert_instr;
/// Return an integer with the reversed byte order of x
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap)
#[inline]
#[cfg_attr(test, assert_instr(bswap))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _bswap(x: i32) -> i32 {
bswap_i32(x)
}

View file

@ -10,14 +10,19 @@ use stdsimd_test::assert_instr;
/// Result of the `cpuid` instruction.
#[derive(Copy, Clone, Eq, Ord, PartialEq, PartialOrd)]
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub struct CpuidResult {
/// EAX register.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub eax: u32,
/// EBX register.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub ebx: u32,
/// ECX register.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub ecx: u32,
/// EDX register.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub edx: u32,
}
@ -46,6 +51,7 @@ pub struct CpuidResult {
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
#[inline]
#[cfg_attr(test, assert_instr(cpuid))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
let mut r = mem::uninitialized::<CpuidResult>();
if cfg!(target_arch = "x86") {
@ -66,6 +72,7 @@ pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
/// See [`__cpuid_count`](fn.__cpuid_count.html).
#[inline]
#[cfg_attr(test, assert_instr(cpuid))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn __cpuid(leaf: u32) -> CpuidResult {
__cpuid_count(leaf, 0)
}
@ -114,6 +121,7 @@ pub fn has_cpuid() -> bool {
/// See also [`__cpuid`](fn.__cpuid.html) and
/// [`__cpuid_count`](fn.__cpuid_count.html).
#[inline]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) {
let CpuidResult { eax, ebx, .. } = __cpuid(leaf);
(eax, ebx)

View file

@ -1,8 +1,11 @@
//! `i386` intrinsics
/// Reads EFLAGS.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__readeflags)
#[cfg(target_arch = "x86")]
#[inline(always)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn __readeflags() -> u32 {
let eflags: u32;
asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile");
@ -10,8 +13,11 @@ pub unsafe fn __readeflags() -> u32 {
}
/// Reads EFLAGS.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__readeflags)
#[cfg(target_arch = "x86_64")]
#[inline(always)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn __readeflags() -> u64 {
let eflags: u64;
asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile");
@ -19,15 +25,21 @@ pub unsafe fn __readeflags() -> u64 {
}
/// Write EFLAGS.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__writeeflags)
#[cfg(target_arch = "x86")]
#[inline(always)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn __writeeflags(eflags: u32) {
asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile");
}
/// Write EFLAGS.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__writeeflags)
#[cfg(target_arch = "x86_64")]
#[inline(always)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn __writeeflags(eflags: u64) {
asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile");
}

View file

@ -21,9 +21,12 @@ extern "C" {
///
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave)
#[inline]
#[target_feature(enable = "fxsr")]
#[cfg_attr(test, assert_instr(fxsave))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _fxsave(mem_addr: *mut u8) {
fxsave(mem_addr)
}
@ -42,9 +45,12 @@ pub unsafe fn _fxsave(mem_addr: *mut u8) {
///
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor)
#[inline]
#[target_feature(enable = "fxsr")]
#[cfg_attr(test, assert_instr(fxrstor))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _fxrstor(mem_addr: *const u8) {
fxrstor(mem_addr)
}

View file

@ -33,7 +33,8 @@ types! {
/// # Examples
///
/// ```
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
/// # #![feature(stdsimd)]
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature))]
/// # #![cfg_attr(not(dox), no_std)]
/// # #[cfg(not(dox))]
/// # extern crate std as real_std;
@ -83,7 +84,7 @@ types! {
/// # Examples
///
/// ```
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
/// # #![cfg_attr(not(dox), no_std)]
/// # #[cfg(not(dox))]
/// # extern crate std as real_std;
@ -105,6 +106,7 @@ types! {
/// # if is_x86_feature_detected!("sse2") { unsafe { foo() } }
/// # }
/// ```
#[stable(feature = "simd_x86", since = "1.27.0")]
pub struct __m128i(i64, i64);
/// 128-bit wide set of four `f32` types, x86-specific
@ -126,7 +128,7 @@ types! {
/// # Examples
///
/// ```
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
/// # #![cfg_attr(not(dox), no_std)]
/// # #[cfg(not(dox))]
/// # extern crate std as real_std;
@ -148,6 +150,7 @@ types! {
/// # if is_x86_feature_detected!("sse") { unsafe { foo() } }
/// # }
/// ```
#[stable(feature = "simd_x86", since = "1.27.0")]
pub struct __m128(f32, f32, f32, f32);
/// 128-bit wide set of two `f64` types, x86-specific
@ -169,7 +172,7 @@ types! {
/// # Examples
///
/// ```
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
/// # #![cfg_attr(not(dox), no_std)]
/// # #[cfg(not(dox))]
/// # extern crate std as real_std;
@ -191,6 +194,7 @@ types! {
/// # if is_x86_feature_detected!("sse") { unsafe { foo() } }
/// # }
/// ```
#[stable(feature = "simd_x86", since = "1.27.0")]
pub struct __m128d(f64, f64);
/// 256-bit wide integer vector type, x86-specific
@ -216,7 +220,7 @@ types! {
/// # Examples
///
/// ```
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
/// # #![cfg_attr(not(dox), no_std)]
/// # #[cfg(not(dox))]
/// # extern crate std as real_std;
@ -238,6 +242,7 @@ types! {
/// # if is_x86_feature_detected!("avx") { unsafe { foo() } }
/// # }
/// ```
#[stable(feature = "simd_x86", since = "1.27.0")]
pub struct __m256i(i64, i64, i64, i64);
/// 256-bit wide set of eight `f32` types, x86-specific
@ -259,7 +264,7 @@ types! {
/// # Examples
///
/// ```
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
/// # #![cfg_attr(not(dox), no_std)]
/// # #[cfg(not(dox))]
/// # extern crate std as real_std;
@ -281,6 +286,7 @@ types! {
/// # if is_x86_feature_detected!("sse") { unsafe { foo() } }
/// # }
/// ```
#[stable(feature = "simd_x86", since = "1.27.0")]
pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32);
/// 256-bit wide set of four `f64` types, x86-specific
@ -302,7 +308,7 @@ types! {
/// # Examples
///
/// ```
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
/// # #![cfg_attr(not(dox), no_std)]
/// # #[cfg(not(dox))]
/// # extern crate std as real_std;
@ -324,6 +330,7 @@ types! {
/// # if is_x86_feature_detected!("avx") { unsafe { foo() } }
/// # }
/// ```
#[stable(feature = "simd_x86", since = "1.27.0")]
pub struct __m256d(f64, f64, f64, f64);
}
@ -334,6 +341,7 @@ pub use self::test::*;
#[doc(hidden)]
#[allow(non_camel_case_types)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub(crate) trait m128iExt: Sized {
fn as_m128i(self) -> __m128i;
@ -387,6 +395,7 @@ impl m128iExt for __m128i {
#[doc(hidden)]
#[allow(non_camel_case_types)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub(crate) trait m256iExt: Sized {
fn as_m256i(self) -> __m256i;
@ -590,8 +599,8 @@ pub use self::avx2::*;
mod abm;
pub use self::abm::*;
mod bmi;
pub use self::bmi::*;
mod bmi1;
pub use self::bmi1::*;
mod bmi2;
pub use self::bmi2::*;

View file

@ -21,6 +21,8 @@ extern "C" {
///
/// The immediate byte is used for determining which halves of `a` and `b`
/// should be used. Immediate bits other than 0 and 4 are ignored.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128)
#[inline]
#[target_feature(enable = "pclmulqdq")]
#[cfg_attr(all(test, not(target_os = "linux")),
@ -34,6 +36,7 @@ extern "C" {
#[cfg_attr(all(test, target_os = "linux"),
assert_instr(pclmulhqhqdq, imm8 = 17))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_clmulepi64_si128(
a: __m128i, b: __m128i, imm8: i32
) -> __m128i {

View file

@ -14,10 +14,13 @@ use stdsimd_test::assert_instr;
/// Read a hardware generated 16-bit random value and store the result in val.
/// Return 1 if a random value was generated, and 0 otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand16_step)
#[inline]
#[target_feature(enable = "rdrand")]
#[cfg_attr(test, assert_instr(rdrand))]
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 {
let (v, flag) = x86_rdrand16_step();
*val = v;
@ -26,10 +29,13 @@ pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 {
/// Read a hardware generated 32-bit random value and store the result in val.
/// Return 1 if a random value was generated, and 0 otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand32_step)
#[inline]
#[target_feature(enable = "rdrand")]
#[cfg_attr(test, assert_instr(rdrand))]
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 {
let (v, flag) = x86_rdrand32_step();
*val = v;
@ -38,9 +44,12 @@ pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 {
/// Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store
/// in val. Return 1 if a random value was generated, and 0 otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed16_step)
#[inline]
#[target_feature(enable = "rdseed")]
#[cfg_attr(test, assert_instr(rdseed))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _rdseed16_step(val: &mut u16) -> i32 {
let (v, flag) = x86_rdseed16_step();
*val = v;
@ -49,9 +58,12 @@ pub unsafe fn _rdseed16_step(val: &mut u16) -> i32 {
/// Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store
/// in val. Return 1 if a random value was generated, and 0 otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed32_step)
#[inline]
#[target_feature(enable = "rdseed")]
#[cfg_attr(test, assert_instr(rdseed))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _rdseed32_step(val: &mut u32) -> i32 {
let (v, flag) = x86_rdseed32_step();
*val = v;

View file

@ -17,8 +17,11 @@ use stdsimd_test::assert_instr;
///
/// On processors that support the Intel 64 architecture, the
/// high-order 32 bits of each of RAX and RDX are cleared.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdtsc)
#[inline]
#[cfg_attr(test, assert_instr(rdtsc))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _rdtsc() -> i64 {
rdtsc()
}
@ -37,8 +40,11 @@ pub unsafe fn _rdtsc() -> i64 {
///
/// On processors that support the Intel 64 architecture, the
/// high-order 32 bits of each of RAX, RDX, and RCX are cleared.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__rdtscp)
#[inline]
#[cfg_attr(test, assert_instr(rdtscp))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn __rdtscp(aux: *mut u32) -> u64 {
rdtscp(aux as *mut _)
}

View file

@ -26,9 +26,12 @@ use stdsimd_test::assert_instr;
/// Perform an intermediate calculation for the next four SHA1 message values
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
/// and returning the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1msg1_epu32)
#[inline]
#[target_feature(enable = "sha")]
#[cfg_attr(test, assert_instr(sha1msg1))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(sha1msg1(a.as_i32x4(), b.as_i32x4()))
}
@ -36,9 +39,12 @@ pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
/// Perform the final calculation for the next four SHA1 message values
/// (unsigned 32-bit integers) using the intermediate result in `a` and the
/// previous message values in `b`, and returns the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1msg2_epu32)
#[inline]
#[target_feature(enable = "sha")]
#[cfg_attr(test, assert_instr(sha1msg2))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(sha1msg2(a.as_i32x4(), b.as_i32x4()))
}
@ -46,9 +52,12 @@ pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
/// Calculate SHA1 state variable E after four rounds of operation from the
/// current SHA1 state variable `a`, add that value to the scheduled values
/// (unsigned 32-bit integers) in `b`, and returns the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1nexte_epu32)
#[inline]
#[target_feature(enable = "sha")]
#[cfg_attr(test, assert_instr(sha1nexte))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(sha1nexte(a.as_i32x4(), b.as_i32x4()))
}
@ -58,10 +67,13 @@ pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
/// (unsigned 32-bit integers), and state variable E from `b`, and return the
/// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round
/// constants.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1rnds4_epu32)
#[inline]
#[target_feature(enable = "sha")]
#[cfg_attr(test, assert_instr(sha1rnds4, func = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sha1rnds4_epu32(
a: __m128i, b: __m128i, func: i32
) -> __m128i {
@ -79,9 +91,12 @@ pub unsafe fn _mm_sha1rnds4_epu32(
/// Perform an intermediate calculation for the next four SHA256 message values
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
/// and return the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256msg1_epu32)
#[inline]
#[target_feature(enable = "sha")]
#[cfg_attr(test, assert_instr(sha256msg1))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(sha256msg1(a.as_i32x4(), b.as_i32x4()))
}
@ -89,9 +104,12 @@ pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
/// Perform the final calculation for the next four SHA256 message values
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
/// and return the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256msg2_epu32)
#[inline]
#[target_feature(enable = "sha")]
#[cfg_attr(test, assert_instr(sha256msg2))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(sha256msg2(a.as_i32x4(), b.as_i32x4()))
}
@ -101,9 +119,12 @@ pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
/// pre-computed sum of the next 2 round message values (unsigned 32-bit
/// integers) and the corresponding round constants from `k`, and store the
/// updated SHA256 state (A,B,E,F) in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256rnds2_epu32)
#[inline]
#[target_feature(enable = "sha")]
#[cfg_attr(test, assert_instr(sha256rnds2))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sha256rnds2_epu32(
a: __m128i, b: __m128i, k: __m128i
) -> __m128i {

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -9,54 +9,72 @@ use stdsimd_test::assert_instr;
/// Alternatively add and subtract packed single-precision (32-bit)
/// floating-point elements in `a` to/from packed elements in `b`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_ps)
#[inline]
#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(addsubps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
addsubps(a, b)
}
/// Alternatively add and subtract packed double-precision (64-bit)
/// floating-point elements in `a` to/from packed elements in `b`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_pd)
#[inline]
#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(addsubpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
addsubpd(a, b)
}
/// Horizontally add adjacent pairs of double-precision (64-bit)
/// floating-point elements in `a` and `b`, and pack the results.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pd)
#[inline]
#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(haddpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
haddpd(a, b)
}
/// Horizontally add adjacent pairs of single-precision (32-bit)
/// floating-point elements in `a` and `b`, and pack the results.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_ps)
#[inline]
#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(haddps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
haddps(a, b)
}
/// Horizontally subtract adjacent pairs of double-precision (64-bit)
/// floating-point elements in `a` and `b`, and pack the results.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pd)
#[inline]
#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(hsubpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
hsubpd(a, b)
}
/// Horizontally add adjacent pairs of single-precision (32-bit)
/// floating-point elements in `a` and `b`, and pack the results.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps)
#[inline]
#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(hsubps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
hsubps(a, b)
}
@ -64,45 +82,60 @@ pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
/// Load 128-bits of integer data from unaligned memory.
/// This intrinsic may perform better than `_mm_loadu_si128`
/// when the data crosses a cache line boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128)
#[inline]
#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(lddqu))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
mem::transmute(lddqu(mem_addr as *const _))
}
/// Duplicate the low double-precision (64-bit) floating-point element
/// from `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movedup_pd)
#[inline]
#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(movddup))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d {
simd_shuffle2(a, a, [0, 0])
}
/// Load a double-precision (64-bit) floating-point element from memory
/// into both elements of return vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd)
#[inline]
#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(movddup))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
_mm_load1_pd(mem_addr)
}
/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
/// from `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehdup_ps)
#[inline]
#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(movshdup))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 {
simd_shuffle4(a, a, [1, 1, 3, 3])
}
/// Duplicate even-indexed single-precision (32-bit) floating-point elements
/// from `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_moveldup_ps)
#[inline]
#[target_feature(enable = "sse3")]
#[cfg_attr(test, assert_instr(movsldup))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 {
simd_shuffle4(a, a, [0, 0, 2, 2])
}

View file

@ -10,34 +10,47 @@ use stdsimd_test::assert_instr;
// SSE4 rounding constans
/// round to nearest
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
/// round down
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01;
/// round up
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_TO_POS_INF: i32 = 0x02;
/// truncate
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_TO_ZERO: i32 = 0x03;
/// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04;
/// do not suppress exceptions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_RAISE_EXC: i32 = 0x00;
/// suppress exceptions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_NO_EXC: i32 = 0x08;
/// round to nearest and do not suppress exceptions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_NINT: i32 = 0x00;
/// round down and do not suppress exceptions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_FLOOR: i32 =
(_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF);
/// round up and do not suppress exceptions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_CEIL: i32 =
(_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF);
/// truncate and do not suppress exceptions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_TRUNC: i32 = (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO);
/// use MXCSR.RC and do not suppress exceptions; see
/// `vendor::_MM_SET_ROUNDING_MODE`
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_RINT: i32 =
(_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION);
/// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE`
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _MM_FROUND_NEARBYINT: i32 =
(_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION);
@ -46,9 +59,12 @@ pub const _MM_FROUND_NEARBYINT: i32 =
/// The high bit of each corresponding mask byte determines the selection.
/// If the high bit is set the element of `a` is selected. The element
/// of `b` is selected otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_epi8)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pblendvb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blendv_epi8(
a: __m128i, b: __m128i, mask: __m128i
) -> __m128i {
@ -64,10 +80,13 @@ pub unsafe fn _mm_blendv_epi8(
/// The mask bits determine the selection. A clear bit selects the
/// corresponding element of `a`, and a set bit the corresponding
/// element of `b`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
let a = a.as_i16x8();
let b = b.as_i16x8();
@ -81,28 +100,37 @@ pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
/// Blend packed double-precision (64-bit) floating-point elements from `a`
/// and `b` using `mask`
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_pd)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendvpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
blendvpd(a, b, mask)
}
/// Blend packed single-precision (32-bit) floating-point elements from `a`
/// and `b` using `mask`
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendvps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
blendvps(a, b, mask)
}
/// Blend packed double-precision (64-bit) floating-point elements from `a`
/// and `b` using control mask `imm2`
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {
macro_rules! call {
($imm2:expr) => {
@ -114,10 +142,13 @@ pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {
/// Blend packed single-precision (32-bit) floating-point elements from `a`
/// and `b` using mask `imm4`
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_ps)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
@ -129,11 +160,14 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 {
/// Extract a single-precision (32-bit) floating-point element from `a`,
/// selected with `imm8`
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_ps)
#[inline]
#[target_feature(enable = "sse4.1")]
// TODO: Add test for Windows
#[cfg_attr(test, assert_instr(extractps, imm8 = 0))]
#[rustc_args_required_const(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 {
mem::transmute(simd_extract::<_, f32>(a, imm8 as u32 & 0b11))
}
@ -142,21 +176,27 @@ pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 {
/// integer containing the zero-extended integer data.
///
/// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi8)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pextrb, imm8 = 0))]
#[rustc_args_required_const(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 {
let imm8 = (imm8 & 15) as u32;
simd_extract::<_, u8>(a.as_u8x16(), imm8) as i32
}
/// Extract an 32-bit integer from `a` selected with `imm8`
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi32)
#[inline]
#[target_feature(enable = "sse4.1")]
// TODO: Add test for Windows
#[cfg_attr(test, assert_instr(extractps, imm8 = 1))]
#[rustc_args_required_const(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 {
let imm8 = (imm8 & 3) as u32;
simd_extract::<_, i32>(a.as_i32x4(), imm8)
@ -184,10 +224,13 @@ pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 {
///
/// * Bits `[3:0]`: If any of these bits are set, the corresponding result
/// element is cleared.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_ps)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
macro_rules! call {
($imm8:expr) => {
@ -199,10 +242,13 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
/// Return a copy of `a` with the 8-bit integer from `i` inserted at a
/// location specified by `imm8`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi8)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
mem::transmute(simd_insert(
a.as_i8x16(),
@ -213,10 +259,13 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
/// Return a copy of `a` with the 32-bit integer from `i` inserted at a
/// location specified by `imm8`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i {
mem::transmute(simd_insert(
a.as_i32x4(),
@ -227,97 +276,130 @@ pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i {
/// Compare packed 8-bit integers in `a` and `b` and return packed maximum
/// values in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pmaxsb(a.as_i8x16(), b.as_i8x16()))
}
/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
/// maximum.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pmaxuw(a.as_u16x8(), b.as_u16x8()))
}
/// Compare packed 32-bit integers in `a` and `b`, and return packed maximum
/// values.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pmaxsd(a.as_i32x4(), b.as_i32x4()))
}
/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
/// maximum values.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmaxud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pmaxud(a.as_u32x4(), b.as_u32x4()))
}
/// Compare packed 8-bit integers in `a` and `b` and return packed minimum
/// values in dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pminsb(a.as_i8x16(), b.as_i8x16()))
}
/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
/// minimum.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pminuw(a.as_u16x8(), b.as_u16x8()))
}
/// Compare packed 32-bit integers in `a` and `b`, and return packed minimum
/// values.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pminsd(a.as_i32x4(), b.as_i32x4()))
}
/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
/// minimum values.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pminud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pminud(a.as_u32x4(), b.as_u32x4()))
}
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
/// using unsigned saturation
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(packusdw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(packusdw(a.as_i32x4(), b.as_i32x4()))
}
/// Compare packed 64-bit integers in `a` and `b` for equality
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi64)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pcmpeqq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
}
/// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi16)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxbw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
let a = a.as_i8x16();
let a = simd_shuffle8::<_, i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
@ -325,9 +407,12 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
}
/// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxbd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
let a = a.as_i8x16();
let a = simd_shuffle4::<_, i8x4>(a, a, [0, 1, 2, 3]);
@ -336,9 +421,12 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
/// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed
/// 64-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi64)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxbq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
let a = a.as_i8x16();
let a = simd_shuffle2::<_, i8x2>(a, a, [0, 1]);
@ -346,9 +434,12 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
}
/// Sign extend packed 16-bit integers in `a` to packed 32-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxwd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
let a = a.as_i16x8();
let a = simd_shuffle4::<_, i16x4>(a, a, [0, 1, 2, 3]);
@ -356,9 +447,12 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
}
/// Sign extend packed 16-bit integers in `a` to packed 64-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi64)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxwq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
let a = a.as_i16x8();
let a = simd_shuffle2::<_, i16x2>(a, a, [0, 1]);
@ -366,9 +460,12 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
}
/// Sign extend packed 32-bit integers in `a` to packed 64-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_epi64)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovsxdq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
let a = a.as_i32x4();
let a = simd_shuffle2::<_, i32x2>(a, a, [0, 1]);
@ -376,9 +473,12 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
}
/// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxbw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
let a = a.as_u8x16();
let a = simd_shuffle8::<_, u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
@ -386,9 +486,12 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
}
/// Zero extend packed unsigned 8-bit integers in `a` to packed 32-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxbd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
let a = a.as_u8x16();
let a = simd_shuffle4::<_, u8x4>(a, a, [0, 1, 2, 3]);
@ -396,9 +499,12 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
}
/// Zero extend packed unsigned 8-bit integers in `a` to packed 64-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi64)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxbq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
let a = a.as_u8x16();
let a = simd_shuffle2::<_, u8x2>(a, a, [0, 1]);
@ -407,9 +513,12 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
/// Zero extend packed unsigned 16-bit integers in `a`
/// to packed 32-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxwd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
let a = a.as_u16x8();
let a = simd_shuffle4::<_, u16x4>(a, a, [0, 1, 2, 3]);
@ -418,9 +527,12 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
/// Zero extend packed unsigned 16-bit integers in `a`
/// to packed 64-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi64)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxwq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
let a = a.as_u16x8();
let a = simd_shuffle2::<_, u16x2>(a, a, [0, 1]);
@ -429,9 +541,12 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
/// Zero extend packed unsigned 32-bit integers in `a`
/// to packed 64-bit integers
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_epi64)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmovzxdq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
let a = a.as_u32x4();
let a = simd_shuffle2::<_, u32x2>(a, a, [0, 1]);
@ -445,10 +560,13 @@ pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
/// the dot product will be stored in the return value component. Otherwise if
/// the broadcast mask bit is zero then the return component will be zero.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_pd)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(dppd, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
macro_rules! call {
($imm8:expr) => {
@ -465,10 +583,13 @@ pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
/// the dot product will be stored in the return value component. Otherwise if
/// the broadcast mask bit is zero then the return component will be zero.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(dpps, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
macro_rules! call {
($imm8:expr) => {
@ -481,9 +602,12 @@ pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
/// Round the packed double-precision (64-bit) floating-point elements in `a`
/// down to an integer value, and store the results as packed double-precision
/// floating-point elements.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d {
roundpd(a, _MM_FROUND_FLOOR)
}
@ -491,9 +615,12 @@ pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d {
/// Round the packed single-precision (32-bit) floating-point elements in `a`
/// down to an integer value, and store the results as packed single-precision
/// floating-point elements.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 {
roundps(a, _MM_FROUND_FLOOR)
}
@ -503,9 +630,12 @@ pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 {
/// floating-point element in the lower element of the intrinsic result,
/// and copy the upper element from `a` to the upper element of the intrinsic
/// result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
roundsd(a, b, _MM_FROUND_FLOOR)
}
@ -515,9 +645,12 @@ pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
/// floating-point element in the lower element of the intrinsic result,
/// and copy the upper 3 packed elements from `a` to the upper elements
/// of the intrinsic result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
roundss(a, b, _MM_FROUND_FLOOR)
}
@ -525,9 +658,12 @@ pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
/// Round the packed double-precision (64-bit) floating-point elements in `a`
/// up to an integer value, and store the results as packed double-precision
/// floating-point elements.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d {
roundpd(a, _MM_FROUND_CEIL)
}
@ -535,9 +671,12 @@ pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d {
/// Round the packed single-precision (32-bit) floating-point elements in `a`
/// up to an integer value, and store the results as packed single-precision
/// floating-point elements.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 {
roundps(a, _MM_FROUND_CEIL)
}
@ -547,9 +686,12 @@ pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 {
/// floating-point element in the lower element of the intrisic result,
/// and copy the upper element from `a` to the upper element
/// of the intrinsic result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
roundsd(a, b, _MM_FROUND_CEIL)
}
@ -559,9 +701,12 @@ pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
/// floating-point element in the lower element of the intrinsic result,
/// and copy the upper 3 packed elements from `a` to the upper elements
/// of the intrinsic result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
roundss(a, b, _MM_FROUND_CEIL)
}
@ -602,10 +747,13 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
/// _MM_FROUND_CUR_DIRECTION;
/// # }
/// ```
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_pd)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundpd, rounding = 0))]
#[rustc_args_required_const(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
@ -652,10 +800,13 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
/// _MM_FROUND_CUR_DIRECTION;
/// # }
/// ```
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundps, rounding = 0))]
#[rustc_args_required_const(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
@ -703,10 +854,13 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
/// _MM_FROUND_CUR_DIRECTION;
/// # }
/// ```
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_sd)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundsd, rounding = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
@ -754,10 +908,13 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
/// _MM_FROUND_CUR_DIRECTION;
/// # }
/// ```
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ss)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(roundss, rounding = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
macro_rules! call {
($imm4:expr) => {
@ -786,18 +943,24 @@ pub unsafe fn _mm_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
/// * bits `[15:0]` - contain the minimum value found in parameter `a`,
/// * bits `[18:16]` - contain the index of the minimum value
/// * remaining bits are set to `0`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(phminposuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i {
mem::transmute(phminposuw(a.as_u16x8()))
}
/// Multiply the low 32-bit integers from each packed 64-bit
/// element in `a` and `b`, and return the signed 64-bit result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epi32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmuldq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pmuldq(a.as_i32x4(), b.as_i32x4()))
}
@ -808,9 +971,12 @@ pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
/// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping
/// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would
/// return a negative number.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi32)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pmulld))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(simd_mul(a.as_i32x4(), b.as_i32x4()))
}
@ -846,10 +1012,13 @@ pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
///
/// * A `__m128i` vector containing the sums of the sets of
/// absolute differences between both operands.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mpsadbw_epu8)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(mpsadbw, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
let a = a.as_u8x16();
let b = b.as_u8x16();
@ -874,9 +1043,12 @@ pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
///
/// * `1` - if the specified bits are all zeros,
/// * `0` - otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
ptestz(a.as_i64x2(), mask.as_i64x2())
}
@ -894,9 +1066,12 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
///
/// * `1` - if the specified bits are all ones,
/// * `0` - otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestc(a.as_i64x2(), mask.as_i64x2())
}
@ -914,9 +1089,12 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
///
/// * `1` - if the specified bits are neither all zeros nor all ones,
/// * `0` - otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_si128)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
ptestnzc(a.as_i64x2(), mask.as_i64x2())
}
@ -934,9 +1112,12 @@ pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
///
/// * `1` - if the specified bits are all zeros,
/// * `0` - otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
_mm_testz_si128(a, mask)
}
@ -952,10 +1133,13 @@ pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
///
/// * `1` - if the bits specified in the operand are all set to 1,
/// * `0` - otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pcmpeqd))]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
_mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
}
@ -973,9 +1157,12 @@ pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
///
/// * `1` - if the specified bits are neither all zeros nor all ones,
/// * `0` - otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_mix_ones_zeros)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(ptest))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
_mm_testnzc_si128(a, mask)
}

View file

@ -10,49 +10,68 @@ use coresimd::simd_llvm::*;
use coresimd::x86::*;
/// String contains unsigned 8-bit characters *(Default)*
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000;
/// String contains unsigned 16-bit characters
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_UWORD_OPS: i32 = 0b0000_0001;
/// String contains signed 8-bit characters
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_SBYTE_OPS: i32 = 0b0000_0010;
/// String contains unsigned 16-bit characters
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_SWORD_OPS: i32 = 0b0000_0011;
/// For each character in `a`, find if it is in `b` *(Default)*
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_CMP_EQUAL_ANY: i32 = 0b0000_0000;
/// For each character in `a`, determine if
/// `b[0] <= c <= b[1] or b[1] <= c <= b[2]...`
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_CMP_RANGES: i32 = 0b0000_0100;
/// The strings defined by `a` and `b` are equal
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_CMP_EQUAL_EACH: i32 = 0b0000_1000;
/// Search for the defined substring in the target
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100;
/// Do not negate results *(Default)*
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000;
/// Negate results
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000;
/// Do not negate results before the end of the string
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000;
/// Negate results only before the end of the string
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000;
/// **Index only**: return the least significant bit *(Default)*
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_LEAST_SIGNIFICANT: i32 = 0b0000_0000;
/// **Index only**: return the most significant bit
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_MOST_SIGNIFICANT: i32 = 0b0100_0000;
/// **Mask only**: return the bit mask
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_BIT_MASK: i32 = 0b0000_0000;
/// **Mask only**: return the byte mask
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000;
/// Compare packed strings with implicit lengths in `a` and `b` using the
/// control in `imm8`, and return the generated mask.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrm)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
let a = a.as_i8x16();
let b = b.as_i8x16();
@ -296,10 +315,13 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
/// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html
/// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
/// [`_mm_cmpestri`]: fn._mm_cmpestri.html
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistri)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@ -314,10 +336,13 @@ pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
/// Compare packed strings with implicit lengths in `a` and `b` using the
/// control in `imm8`, and return `1` if any character in `b` was null.
/// and `0` otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrz)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@ -332,10 +357,13 @@ pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
/// Compare packed strings with implicit lengths in `a` and `b` using the
/// control in `imm8`, and return `1` if the resulting mask was non-zero,
/// and `0` otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrc)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@ -350,10 +378,13 @@ pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
/// Compare packed strings with implicit lengths in `a` and `b` using the
/// control in `imm8`, and returns `1` if any character in `a` was null,
/// and `0` otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrs)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@ -367,10 +398,13 @@ pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
/// Compare packed strings with implicit lengths in `a` and `b` using the
/// control in `imm8`, and return bit `0` of the resulting bit mask.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistro)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@ -385,10 +419,13 @@ pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
/// Compare packed strings with implicit lengths in `a` and `b` using the
/// control in `imm8`, and return `1` if `b` did not contain a null
/// character and the resulting mask was zero, and `0` otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistra)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
let a = a.as_i8x16();
let b = b.as_i8x16();
@ -402,10 +439,13 @@ pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
/// using the control in `imm8`, and return the generated mask.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestrm, imm8 = 0))]
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrm(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> __m128i {
@ -506,10 +546,13 @@ pub unsafe fn _mm_cmpestrm(
/// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html
/// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
/// [`_mm_cmpistri`]: fn._mm_cmpistri.html
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestri)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestri(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
@ -526,10 +569,13 @@ pub unsafe fn _mm_cmpestri(
/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
/// using the control in `imm8`, and return `1` if any character in
/// `b` was null, and `0` otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrz)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrz(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
@ -546,10 +592,13 @@ pub unsafe fn _mm_cmpestrz(
/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
/// using the control in `imm8`, and return `1` if the resulting mask
/// was non-zero, and `0` otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrc)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrc(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
@ -566,10 +615,13 @@ pub unsafe fn _mm_cmpestrc(
/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
/// using the control in `imm8`, and return `1` if any character in
/// a was null, and `0` otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrs)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestrs(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
@ -586,10 +638,13 @@ pub unsafe fn _mm_cmpestrs(
/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
/// using the control in `imm8`, and return bit `0` of the resulting
/// bit mask.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestro)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestro(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
@ -607,10 +662,13 @@ pub unsafe fn _mm_cmpestro(
/// using the control in `imm8`, and return `1` if `b` did not
/// contain a null character and the resulting mask was zero, and `0`
/// otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestra)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
#[rustc_args_required_const(4)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpestra(
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
) -> i32 {
@ -626,36 +684,48 @@ pub unsafe fn _mm_cmpestra(
/// Starting with the initial value in `crc`, return the accumulated
/// CRC32 value for unsigned 8-bit integer `v`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u8)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(crc32))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 {
crc32_32_8(crc, v)
}
/// Starting with the initial value in `crc`, return the accumulated
/// CRC32 value for unsigned 16-bit integer `v`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u16)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(crc32))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 {
crc32_32_16(crc, v)
}
/// Starting with the initial value in `crc`, return the accumulated
/// CRC32 value for unsigned 32-bit integer `v`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u32)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(crc32))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
crc32_32_32(crc, v)
}
/// Compare packed 64-bit integers in `a` and `b` for greater-than,
/// return the results.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi64)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(pcmpgtq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
}

View file

@ -36,6 +36,7 @@ extern "C" {
#[inline]
#[target_feature(enable = "sse4a")]
#[cfg_attr(test, assert_instr(extrq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
mem::transmute(extrq(x.as_i64x2(), y.as_i8x16()))
}
@ -52,6 +53,7 @@ pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
#[inline]
#[target_feature(enable = "sse4a")]
#[cfg_attr(test, assert_instr(insertq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
mem::transmute(insertq(x.as_i64x2(), y.as_i64x2()))
}
@ -60,6 +62,7 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
#[inline]
#[target_feature(enable = "sse4a")]
#[cfg_attr(test, assert_instr(movntsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
movntsd(p, a);
}
@ -68,6 +71,7 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
#[inline]
#[target_feature(enable = "sse4a")]
#[cfg_attr(test, assert_instr(movntss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
movntss(p, a);
}

View file

@ -10,9 +10,12 @@ use stdsimd_test::assert_instr;
/// Compute the absolute value of packed 8-bit signed integers in `a` and
/// return the unsigned results.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
mem::transmute(pabsb128(a.as_i8x16()))
}
@ -20,9 +23,12 @@ pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
/// Compute the absolute value of each of the packed 16-bit signed integers in
/// `a` and
/// return the 16-bit unsigned integer
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
mem::transmute(pabsw128(a.as_i16x8()))
}
@ -30,9 +36,12 @@ pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
/// Compute the absolute value of each of the packed 32-bit signed integers in
/// `a` and
/// return the 32-bit unsigned integer
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pabsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
mem::transmute(pabsd128(a.as_i32x4()))
}
@ -61,19 +70,25 @@ pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
/// r
/// }
/// ```
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pshufb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
}
/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
/// shift the result right by `n` bytes, and return the low 16 bytes.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(palignr, n = 15))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
let n = n as u32;
// If palignr is shifting the pair of vectors more than the size of two
@ -141,9 +156,12 @@ pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16].
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
}
@ -151,27 +169,36 @@ pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_epi16)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
}
/// Horizontally add the adjacent pairs of values contained in 2 packed
/// 128-bit vectors of [4 x i32].
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phaddd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
}
/// Horizontally subtract the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [8 x i16].
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
}
@ -180,18 +207,24 @@ pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
/// saturated to 8000h.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
}
/// Horizontally subtract the adjacent pairs of values contained in 2
/// packed 128-bit vectors of [4 x i32].
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(phsubd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
}
@ -201,9 +234,12 @@ pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
/// integer values contained in the second source operand, add pairs of
/// contiguous products with signed saturation, and writes the 16-bit sums to
/// the corresponding bits in the destination.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_epi16)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pmaddubsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
}
@ -211,9 +247,12 @@ pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
/// Multiply packed 16-bit signed integer values, truncate the 32-bit
/// product to the 18 most significant bits by right-shifting, round the
/// truncated value by adding 1, and write bits [16:1] to the destination.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_epi16)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(pmulhrsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
}
@ -222,9 +261,12 @@ pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
/// integer in `b` is negative, and return the result.
/// Elements in result are zeroed out when the corresponding element in `b`
/// is zero.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi8)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
}
@ -233,9 +275,12 @@ pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
/// integer in `b` is negative, and return the results.
/// Elements in result are zeroed out when the corresponding element in `b`
/// is zero.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi16)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
}
@ -244,9 +289,12 @@ pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
/// integer in `b` is negative, and return the results.
/// Element in result are zeroed out when the corresponding element in `b`
/// is zero.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi32)
#[inline]
#[target_feature(enable = "ssse3")]
#[cfg_attr(test, assert_instr(psignd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
mem::transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
}

View file

@ -70,6 +70,7 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcfill))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcfill_u32(x: u32) -> u32 {
x & (x.wrapping_add(1))
}
@ -81,6 +82,7 @@ pub unsafe fn _blcfill_u32(x: u32) -> u32 {
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcfill))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcfill_u64(x: u64) -> u64 {
x & (x.wrapping_add(1))
}
@ -91,6 +93,7 @@ pub unsafe fn _blcfill_u64(x: u64) -> u64 {
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blci))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blci_u32(x: u32) -> u32 {
x | !(x.wrapping_add(1))
}
@ -102,6 +105,7 @@ pub unsafe fn _blci_u32(x: u32) -> u32 {
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blci))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blci_u64(x: u64) -> u64 {
x | !(x.wrapping_add(1))
}
@ -112,6 +116,7 @@ pub unsafe fn _blci_u64(x: u64) -> u64 {
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcic))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcic_u32(x: u32) -> u32 {
!x & (x.wrapping_add(1))
}
@ -123,6 +128,7 @@ pub unsafe fn _blcic_u32(x: u32) -> u32 {
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcic))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcic_u64(x: u64) -> u64 {
!x & (x.wrapping_add(1))
}
@ -134,6 +140,7 @@ pub unsafe fn _blcic_u64(x: u64) -> u64 {
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcmsk))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcmsk_u32(x: u32) -> u32 {
x ^ (x.wrapping_add(1))
}
@ -146,6 +153,7 @@ pub unsafe fn _blcmsk_u32(x: u32) -> u32 {
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcmsk))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
x ^ (x.wrapping_add(1))
}
@ -156,6 +164,7 @@ pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcs))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcs_u32(x: u32) -> u32 {
x | (x.wrapping_add(1))
}
@ -167,6 +176,7 @@ pub unsafe fn _blcs_u32(x: u32) -> u32 {
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blcs))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blcs_u64(x: u64) -> u64 {
x | x.wrapping_add(1)
}
@ -177,6 +187,7 @@ pub unsafe fn _blcs_u64(x: u64) -> u64 {
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsfill))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsfill_u32(x: u32) -> u32 {
x | (x.wrapping_sub(1))
}
@ -188,6 +199,7 @@ pub unsafe fn _blsfill_u32(x: u32) -> u32 {
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsfill))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsfill_u64(x: u64) -> u64 {
x | (x.wrapping_sub(1))
}
@ -198,6 +210,7 @@ pub unsafe fn _blsfill_u64(x: u64) -> u64 {
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsic))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsic_u32(x: u32) -> u32 {
!x | (x.wrapping_sub(1))
}
@ -209,6 +222,7 @@ pub unsafe fn _blsic_u32(x: u32) -> u32 {
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(blsic))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsic_u64(x: u64) -> u64 {
!x | (x.wrapping_sub(1))
}
@ -220,6 +234,7 @@ pub unsafe fn _blsic_u64(x: u64) -> u64 {
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(t1mskc))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _t1mskc_u32(x: u32) -> u32 {
!x | (x.wrapping_add(1))
}
@ -232,6 +247,7 @@ pub unsafe fn _t1mskc_u32(x: u32) -> u32 {
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(t1mskc))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
!x | (x.wrapping_add(1))
}
@ -243,6 +259,7 @@ pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
#[inline]
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(tzmsk))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _tzmsk_u32(x: u32) -> u32 {
!x & (x.wrapping_sub(1))
}
@ -255,6 +272,7 @@ pub unsafe fn _tzmsk_u32(x: u32) -> u32 {
#[target_feature(enable = "tbm")]
#[cfg_attr(test, assert_instr(tzmsk))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
!x & (x.wrapping_sub(1))
}

View file

@ -31,9 +31,12 @@ extern "C" {
///
/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of
/// Intel® 64 and IA-32 Architectures Software Developers Manual, Volume 1.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave)
#[inline]
#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xsave))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) {
xsave(
mem_addr,
@ -48,9 +51,12 @@ pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) {
/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
/// boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor)
#[inline]
#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xrstor))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) {
xrstor(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
}
@ -58,24 +64,31 @@ pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) {
/// `XFEATURE_ENABLED_MASK` for `XCR`
///
/// This intrinsic maps to `XSETBV` instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub const _XCR_XFEATURE_ENABLED_MASK: u32 = 0;
/// Copy 64-bits from `val` to the extended control register (`XCR`) specified
/// by `a`.
///
/// Currently only `XFEATURE_ENABLED_MASK` `XCR` is supported.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsetbv)
#[inline]
#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xsetbv))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsetbv(a: u32, val: u64) {
xsetbv(a, (val >> 32) as u32, val as u32);
}
/// Reads the contents of the extended control register `XCR`
/// specified in `xcr_no`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xgetbv)
#[inline]
#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xgetbv))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
let eax: u32;
let edx: u32;
@ -90,9 +103,12 @@ pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
/// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize
/// the manner in which data is saved. The performance of this instruction will
/// be equal to or better than using the `XSAVE` instruction.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt)
#[inline]
#[target_feature(enable = "xsave,xsaveopt")]
#[cfg_attr(test, assert_instr(xsaveopt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
xsaveopt(
mem_addr,
@ -107,9 +123,12 @@ pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
/// `xsavec` differs from `xsave` in that it uses compaction and that it may
/// use init optimization. State is saved based on bits [62:0] in `save_mask`
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec)
#[inline]
#[target_feature(enable = "xsave,xsavec")]
#[cfg_attr(test, assert_instr(xsavec))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
xsavec(
mem_addr,
@ -125,9 +144,12 @@ pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
/// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the
/// modified optimization. State is saved based on bits [62:0] in `save_mask`
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves)
#[inline]
#[target_feature(enable = "xsave,xsaves")]
#[cfg_attr(test, assert_instr(xsaves))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) {
xsaves(
mem_addr,
@ -145,9 +167,12 @@ pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) {
/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
/// boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors)
#[inline]
#[target_feature(enable = "xsave,xsaves")]
#[cfg_attr(test, assert_instr(xrstors))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) {
xrstors(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
}

View file

@ -23,17 +23,23 @@ use stdsimd_test::assert_instr;
/// Counts the leading most significant zero bits.
///
/// When the operand is zero, it returns its size in bits.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u64)
#[inline]
#[target_feature(enable = "lzcnt")]
#[cfg_attr(test, assert_instr(lzcnt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _lzcnt_u64(x: u64) -> u64 {
x.leading_zeros() as u64
}
/// Counts the bits that are set.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt64)
#[inline]
#[target_feature(enable = "popcnt")]
#[cfg_attr(test, assert_instr(popcnt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _popcnt64(x: i64) -> i32 {
x.count_ones() as i32
}

View file

@ -19,10 +19,13 @@ use mem;
/// Copy `a` to result, and insert the 64-bit integer `i` into result
/// at the location specified by `index`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi64)
#[inline]
#[rustc_args_required_const(2)]
#[target_feature(enable = "avx")]
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i {
mem::transmute(simd_insert(a.as_i64x4(), (index as u32) & 3, i))
}

View file

@ -22,10 +22,13 @@ use coresimd::simd_llvm::*;
use coresimd::x86::*;
/// Extract a 64-bit integer from `a`, selected with `imm8`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi64)
#[inline]
#[target_feature(enable = "avx2")]
#[rustc_args_required_const(1)]
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 {
let imm8 = (imm8 & 3) as u32;
simd_extract(a.as_i64x4(), imm8)

View file

@ -14,10 +14,13 @@ use stdsimd_test::assert_instr;
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
/// the least significant bits of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u64)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(bextr))]
#[cfg(not(target_arch = "x86"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 {
_bextr2_u64(a, ((start & 0xff) | ((len & 0xff) << 8)) as u64)
}
@ -27,36 +30,48 @@ pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 {
///
/// Bits [7,0] of `control` specify the index to the first bit in the range to
/// be extracted, and bits [15,8] specify the length of the range.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u64)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(bextr))]
#[cfg(not(target_arch = "x86"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 {
x86_bmi_bextr_64(a, control)
}
/// Bitwise logical `AND` of inverted `a` with `b`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u64)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(andn))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 {
!a & b
}
/// Extract lowest set isolated bit.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u64)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(blsi))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsi_u64(x: u64) -> u64 {
x & x.wrapping_neg()
}
/// Get mask up to lowest set bit.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u64)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(blsmsk))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsmsk_u64(x: u64) -> u64 {
x ^ (x.wrapping_sub(1_u64))
}
@ -64,10 +79,13 @@ pub unsafe fn _blsmsk_u64(x: u64) -> u64 {
/// Resets the lowest set bit of `x`.
///
/// If `x` is sets CF.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u64)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(blsr))]
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _blsr_u64(x: u64) -> u64 {
x & (x.wrapping_sub(1))
}
@ -75,9 +93,12 @@ pub unsafe fn _blsr_u64(x: u64) -> u64 {
/// Counts the number of trailing least significant zero bits.
///
/// When the source operand is 0, it returns its size in bits.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u64)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(tzcnt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _tzcnt_u64(x: u64) -> u64 {
x.trailing_zeros() as u64
}
@ -85,9 +106,12 @@ pub unsafe fn _tzcnt_u64(x: u64) -> u64 {
/// Counts the number of trailing least significant zero bits.
///
/// When the source operand is 0, it returns its size in bits.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_64)
#[inline]
#[target_feature(enable = "bmi1")]
#[cfg_attr(test, assert_instr(tzcnt))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 {
x.trailing_zeros() as i64
}

View file

@ -17,10 +17,13 @@ use stdsimd_test::assert_instr;
///
/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
/// the low half and the high half of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u64)
#[inline]
#[cfg_attr(test, assert_instr(mulx))]
#[target_feature(enable = "bmi2")]
#[cfg(not(target_arch = "x86"))] // calls an intrinsic
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
let result: u128 = (a as u128) * (b as u128);
*hi = (result >> 64) as u64;
@ -28,30 +31,39 @@ pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
}
/// Zero higher bits of `a` >= `index`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u64)
#[inline]
#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(bzhi))]
#[cfg(not(target_arch = "x86"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 {
x86_bmi2_bzhi_64(a, index as u64)
}
/// Scatter contiguous low order bits of `a` to the result at the positions
/// specified by the `mask`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u64)
#[inline]
#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(pdep))]
#[cfg(not(target_arch = "x86"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 {
x86_bmi2_pdep_64(a, mask)
}
/// Gathers the bits of `x` specified by the `mask` into the contiguous low
/// order bit positions of the result.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u64)
#[inline]
#[target_feature(enable = "bmi2")]
#[cfg_attr(test, assert_instr(pext))]
#[cfg(not(target_arch = "x86"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 {
x86_bmi2_pext_64(a, mask)
}

View file

@ -6,8 +6,11 @@
use stdsimd_test::assert_instr;
/// Return an integer with the reversed byte order of x
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap64)
#[inline]
#[cfg_attr(test, assert_instr(bswap))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _bswap64(x: i64) -> i64 {
bswap_i64(x)
}

View file

@ -21,9 +21,12 @@ extern "C" {
///
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave64)
#[inline]
#[target_feature(enable = "fxsr")]
#[cfg_attr(test, assert_instr(fxsave64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _fxsave64(mem_addr: *mut u8) {
fxsave64(mem_addr)
}
@ -42,9 +45,12 @@ pub unsafe fn _fxsave64(mem_addr: *mut u8) {
///
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor64)
#[inline]
#[target_feature(enable = "fxsr")]
#[cfg_attr(test, assert_instr(fxrstor64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _fxrstor64(mem_addr: *const u8) {
fxrstor64(mem_addr)
}

View file

@ -12,10 +12,13 @@ use stdsimd_test::assert_instr;
/// Read a hardware generated 64-bit random value and store the result in val.
/// Return 1 if a random value was generated, and 0 otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand64_step)
#[inline]
#[target_feature(enable = "rdrand")]
#[cfg_attr(test, assert_instr(rdrand))]
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 {
let (v, flag) = x86_rdrand64_step();
*val = v;
@ -24,9 +27,12 @@ pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 {
/// Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store
/// in val. Return 1 if a random value was generated, and 0 otherwise.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed64_step)
#[inline]
#[target_feature(enable = "rdseed")]
#[cfg_attr(test, assert_instr(rdseed))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _rdseed64_step(val: &mut u64) -> i32 {
let (v, flag) = x86_rdseed64_step();
*val = v;

View file

@ -24,9 +24,12 @@ extern "C" {
/// [`_mm_setcsr`](fn._mm_setcsr.html)).
///
/// This corresponds to the `CVTSS2SI` instruction (with 64 bit output).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si64)
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtss2si))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 {
cvtss2si64(a)
}
@ -40,9 +43,12 @@ pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 {
/// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
///
/// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si64)
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvttss2si))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 {
cvttss2si64(a)
}
@ -52,9 +58,12 @@ pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 {
///
/// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit
/// input).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_ss)
#[inline]
#[target_feature(enable = "sse")]
#[cfg_attr(test, assert_instr(cvtsi2ss))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 {
cvtsi642ss(a, b)
}

View file

@ -17,34 +17,46 @@ extern "C" {
/// Convert the lower double-precision (64-bit) floating-point element in a to
/// a 64-bit integer.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64)
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtsd2si))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtsd_si64(a: __m128d) -> i64 {
cvtsd2si64(a)
}
/// Alias for `_mm_cvtsd_si64`
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64x)
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtsd2si))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 {
_mm_cvtsd_si64(a)
}
/// Convert the lower double-precision (64-bit) floating-point element in `a`
/// to a 64-bit integer with truncation.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64)
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvttsd2si))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvttsd_si64(a: __m128d) -> i64 {
cvttsd2si64(a)
}
/// Alias for `_mm_cvttsd_si64`
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64x)
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvttsd2si))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
_mm_cvttsd_si64(a)
}
@ -52,61 +64,82 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
/// Stores a 64-bit integer value in the specified memory location.
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
/// used again soon).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si64)
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(movnti))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
intrinsics::nontemporal_store(mem_addr, a);
}
/// Return a vector whose lowest element is `a` and all higher elements are
/// `0`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_si128)
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i {
_mm_set_epi64x(0, a)
}
/// Return a vector whose lowest element is `a` and all higher elements are
/// `0`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_si128)
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i {
_mm_cvtsi64_si128(a)
}
/// Return the lowest element of `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64)
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 {
simd_extract(a.as_i64x2(), 0)
}
/// Return the lowest element of `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x)
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 {
_mm_cvtsi128_si64(a)
}
/// Return `a` with its lower element replaced by `b` after converting it to
/// an `f64`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_sd)
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtsi2sd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d {
simd_insert(a, 0, b as f64)
}
/// Return `a` with its lower element replaced by `b` after converting it to
/// an `f64`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_sd)
#[inline]
#[target_feature(enable = "sse2")]
#[cfg_attr(test, assert_instr(cvtsi2sd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d {
_mm_cvtsi64_sd(a, b)
}

View file

@ -8,11 +8,14 @@ use mem;
use stdsimd_test::assert_instr;
/// Extract an 64-bit integer from `a` selected with `imm8`
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi64)
#[inline]
#[target_feature(enable = "sse4.1")]
// TODO: Add test for Windows
#[cfg_attr(test, assert_instr(pextrq, imm8 = 1))]
#[rustc_args_required_const(1)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 {
let imm8 = (imm8 & 1) as u32;
simd_extract(a.as_i64x2(), imm8)
@ -20,10 +23,13 @@ pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 {
/// Return a copy of `a` with the 64-bit integer from `i` inserted at a
/// location specified by `imm8`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi64)
#[inline]
#[target_feature(enable = "sse4.1")]
#[cfg_attr(test, assert_instr(pinsrq, imm8 = 0))]
#[rustc_args_required_const(2)]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i {
mem::transmute(simd_insert(a.as_i64x2(), (imm8 & 1) as u32, i))
}

View file

@ -11,9 +11,12 @@ extern "C" {
/// Starting with the initial value in `crc`, return the accumulated
/// CRC32 value for unsigned 64-bit integer `v`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u64)
#[inline]
#[target_feature(enable = "sse4.2")]
#[cfg_attr(test, assert_instr(crc32))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {
crc32_64_64(crc, v)
}

View file

@ -29,9 +29,12 @@ extern "C" {
///
/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of
/// Intel® 64 and IA-32 Architectures Software Developers Manual, Volume 1.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave64)
#[inline]
#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xsave64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) {
xsave64(
mem_addr,
@ -46,9 +49,12 @@ pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) {
/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
/// boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor64)
#[inline]
#[target_feature(enable = "xsave")]
#[cfg_attr(test, assert_instr(xrstor64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) {
xrstor64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
}
@ -60,9 +66,12 @@ pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) {
/// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize
/// the manner in which data is saved. The performance of this instruction will
/// be equal to or better than using the `XSAVE64` instruction.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt64)
#[inline]
#[target_feature(enable = "xsave,xsaveopt")]
#[cfg_attr(test, assert_instr(xsaveopt64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
xsaveopt64(
mem_addr,
@ -77,9 +86,12 @@ pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
/// `xsavec` differs from `xsave` in that it uses compaction and that it may
/// use init optimization. State is saved based on bits [62:0] in `save_mask`
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec64)
#[inline]
#[target_feature(enable = "xsave,xsavec")]
#[cfg_attr(test, assert_instr(xsavec64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
xsavec64(
mem_addr,
@ -95,9 +107,12 @@ pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
/// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the
/// modified optimization. State is saved based on bits [62:0] in `save_mask`
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves64)
#[inline]
#[target_feature(enable = "xsave,xsaves")]
#[cfg_attr(test, assert_instr(xsaves64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) {
xsaves64(
mem_addr,
@ -115,9 +130,12 @@ pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) {
/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
/// boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors64)
#[inline]
#[target_feature(enable = "xsave,xsaves")]
#[cfg_attr(test, assert_instr(xrstors64))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) {
xrstors64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
}

View file

@ -8,7 +8,7 @@
//! [stdsimd]: https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/
#![feature(const_fn, integer_atomics, staged_api, stdsimd)]
#![feature(cfg_target_feature, doc_cfg)]
#![feature(cfg_target_feature, doc_cfg, allow_internal_unstable)]
#![cfg_attr(feature = "cargo-clippy", allow(shadow_reuse))]
#![cfg_attr(target_os = "linux", feature(linkage))]
#![no_std]

View file

@ -16,8 +16,70 @@
//! in a global `AtomicUsize` variable. The query is performed by just checking
//! whether the feature bit in this global variable is set or cleared.
/// A macro to test at *runtime* whether a CPU feature is available on
/// x86/x86-64 platforms.
///
/// This macro is provided in the standard library and will detect at runtime
/// whether the specified CPU feature is detected. This does *not* resolve at
/// compile time unless the specified feature is already enabled for the entire
/// crate. Runtime detection currently relies mostly on the `cpuid` instruction.
///
/// This macro only takes one argument which is a string literal of the feature
/// being tested for. The feature names supported are the lowercase versions of
/// the ones defined by Intel in [their documentation][docs].
///
/// ## Supported arguments
///
/// This macro supports the same names that `#[target_feature]` supports. Unlike
/// `#[target_feature]`, however, this macro does not support names separated
/// with a comma. Instead testing for multiple features must be done through
/// separate macro invocations for now.
///
/// Supported arguments are:
///
/// * `"aes"`
/// * `"pclmulqdq"`
/// * `"rdrand"`
/// * `"rdseed"`
/// * `"tsc"`
/// * `"mmx"`
/// * `"sse"`
/// * `"sse2"`
/// * `"sse3"`
/// * `"ssse3"`
/// * `"sse4.1"`
/// * `"sse4.2"`
/// * `"sse4a"`
/// * `"sha"`
/// * `"avx"`
/// * `"avx2"`
/// * `"avx512f"`
/// * `"avx512cd"`
/// * `"avx512er"`
/// * `"avx512pf"`
/// * `"avx512bw"`
/// * `"avx512dq"`
/// * `"avx512vl"`
/// * `"avx512ifma"`
/// * `"avx512vbmi"`
/// * `"avx512vpopcntdq"`
/// * `"fma"`
/// * `"bmi1"`
/// * `"bmi2"`
/// * `"abm"`
/// * `"lzcnt"`
/// * `"tbm"`
/// * `"popcnt"`
/// * `"fxsr"`
/// * `"xsave"`
/// * `"xsaveopt"`
/// * `"xsaves"`
/// * `"xsavec"`
///
/// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
#[macro_export]
#[unstable(feature = "stdsimd", issue = "0")]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow_internal_unstable]
macro_rules! is_x86_feature_detected {
("aes") => {
cfg!(target_feature = "aes") || $crate::arch::detect::check_for(

View file

@ -343,30 +343,38 @@
/// }
/// }
/// ```
#[unstable(feature = "stdsimd", issue = "0")]
#[stable(feature = "simd_arch", since = "1.27.0")]
pub mod arch {
#[cfg(all(not(dox), target_arch = "x86"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use coresimd::arch::x86;
#[cfg(all(not(dox), target_arch = "x86_64"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub use coresimd::arch::x86_64;
#[cfg(all(not(dox), target_arch = "arm"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub use coresimd::arch::arm;
#[cfg(all(not(dox), target_arch = "aarch64"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub use coresimd::arch::aarch64;
#[cfg(target_arch = "wasm32")]
#[unstable(feature = "stdsimd", issue = "0")]
pub use coresimd::arch::wasm32;
#[cfg(all(not(dox), target_arch = "mips"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub use coresimd::arch::mips;
#[cfg(all(not(dox), target_arch = "mips64"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub use coresimd::arch::mips64;
#[doc(hidden)] // unstable implementation detail
#[unstable(feature = "stdsimd", issue = "0")]
pub mod detect;
/// Platform-specific intrinsics for the `x86` platform.
@ -378,6 +386,7 @@ pub mod arch {
/// [libcore]: ../../../core/arch/x86/index.html
#[cfg(dox)]
#[doc(cfg(target_arch = "x86"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub mod x86 {}
/// Platform-specific intrinsics for the `x86_64` platform.
@ -389,6 +398,7 @@ pub mod arch {
/// [libcore]: ../../../core/arch/x86_64/index.html
#[cfg(dox)]
#[doc(cfg(target_arch = "x86_64"))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub mod x86_64 {}
/// Platform-specific intrinsics for the `arm` platform.
@ -400,6 +410,7 @@ pub mod arch {
/// [libcore]: ../../../core/arch/arm/index.html
#[cfg(dox)]
#[doc(cfg(target_arch = "arm"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub mod arm {}
/// Platform-specific intrinsics for the `aarch64` platform.
@ -411,6 +422,7 @@ pub mod arch {
/// [libcore]: ../../../core/arch/aarch64/index.html
#[cfg(dox)]
#[doc(cfg(target_arch = "aarch64"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub mod aarch64 {}
/// Platform-specific intrinsics for the `mips` platform.
@ -422,6 +434,7 @@ pub mod arch {
/// [libcore]: ../../../core/arch/mips/index.html
#[cfg(dox)]
#[doc(cfg(target_arch = "mips"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub mod mips {}
/// Platform-specific intrinsics for the `mips64` platform.
@ -433,6 +446,7 @@ pub mod arch {
/// [libcore]: ../../../core/arch/mips64/index.html
#[cfg(dox)]
#[doc(cfg(target_arch = "mips64"))]
#[unstable(feature = "stdsimd", issue = "0")]
pub mod mips64 {}
}