Stabilize x86/x86_64 intrinsics (#414)
This commit stabilizes all intrinsics in the `x86` and `x86_64` modules, namely allowing stabilization of the `arch::x86` and `arch::x86_64` module in libstd. Stabilizations here were applied in an automated fashion using [this script][scr], and notably everything related to `__m64` was omitted from this round of stabilization [scr]: https://gist.github.com/alexcrichton/5b456d495d6fe1df46a158754565c7a5
This commit is contained in:
parent
b89963711d
commit
f650b93003
42 changed files with 2897 additions and 15 deletions
|
|
@ -20,8 +20,10 @@ pub use self::v7::*;
|
|||
// NEON is supported on AArch64, and on ARM when built with the v7 and neon
|
||||
// features. Building ARM without neon produces incorrect codegen.
|
||||
#[cfg(any(target_arch = "aarch64",
|
||||
all(target_feature = "v7", target_feature = "neon")))]
|
||||
all(target_feature = "v7", target_feature = "neon"),
|
||||
dox))]
|
||||
mod neon;
|
||||
#[cfg(any(target_arch = "aarch64",
|
||||
all(target_feature = "v7", target_feature = "neon")))]
|
||||
all(target_feature = "v7", target_feature = "neon"),
|
||||
dox))]
|
||||
pub use self::neon::*;
|
||||
|
|
|
|||
|
|
@ -41,14 +41,16 @@ pub mod simd {
|
|||
/// [`aarch64`]: https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/arch/index.html
|
||||
/// [`mips`]: https://rust-lang-nursery.github.io/stdsimd/mips/stdsimd/arch/index.html
|
||||
/// [`mips64`]: https://rust-lang-nursery.github.io/stdsimd/mips64/stdsimd/arch/index.html
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
#[stable(feature = "simd_arch", since = "1.27.0")]
|
||||
pub mod arch {
|
||||
/// Platform-specific intrinsics for the `x86` platform.
|
||||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "x86", dox))]
|
||||
#[doc(cfg(target_arch = "x86"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub mod x86 {
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use coresimd::x86::*;
|
||||
}
|
||||
|
||||
|
|
@ -57,8 +59,11 @@ pub mod arch {
|
|||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "x86_64", dox))]
|
||||
#[doc(cfg(target_arch = "x86_64"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub mod x86_64 {
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use coresimd::x86::*;
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use coresimd::x86_64::*;
|
||||
}
|
||||
|
||||
|
|
@ -67,6 +72,7 @@ pub mod arch {
|
|||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "arm", dox))]
|
||||
#[doc(cfg(target_arch = "arm"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub mod arm {
|
||||
pub use coresimd::arm::*;
|
||||
}
|
||||
|
|
@ -76,6 +82,7 @@ pub mod arch {
|
|||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "aarch64", dox))]
|
||||
#[doc(cfg(target_arch = "aarch64"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub mod aarch64 {
|
||||
pub use coresimd::aarch64::*;
|
||||
pub use coresimd::arm::*;
|
||||
|
|
@ -85,6 +92,7 @@ pub mod arch {
|
|||
///
|
||||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub mod wasm32 {
|
||||
pub use coresimd::wasm32::*;
|
||||
}
|
||||
|
|
@ -94,6 +102,7 @@ pub mod arch {
|
|||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "mips", dox))]
|
||||
#[doc(cfg(target_arch = "mips"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub mod mips {
|
||||
pub use coresimd::mips::*;
|
||||
}
|
||||
|
|
@ -103,6 +112,7 @@ pub mod arch {
|
|||
/// See the [module documentation](../index.html) for more details.
|
||||
#[cfg(any(target_arch = "mips64", dox))]
|
||||
#[doc(cfg(target_arch = "mips64"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub mod mips64 {
|
||||
pub use coresimd::mips::*;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,17 +23,23 @@ use stdsimd_test::assert_instr;
|
|||
/// Counts the leading most significant zero bits.
|
||||
///
|
||||
/// When the operand is zero, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "lzcnt")]
|
||||
#[cfg_attr(test, assert_instr(lzcnt))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _lzcnt_u32(x: u32) -> u32 {
|
||||
x.leading_zeros()
|
||||
}
|
||||
|
||||
/// Counts the bits that are set.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "popcnt")]
|
||||
#[cfg_attr(test, assert_instr(popcnt))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _popcnt32(x: i32) -> i32 {
|
||||
x.count_ones() as i32
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,41 +29,56 @@ extern "C" {
|
|||
}
|
||||
|
||||
/// Perform one round of an AES decryption flow on data (state) in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdec_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aesdec))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_aesdec_si128(a: __m128i, round_key: __m128i) -> __m128i {
|
||||
aesdec(a, round_key)
|
||||
}
|
||||
|
||||
/// Perform the last round of an AES decryption flow on data (state) in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesdeclast_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aesdeclast))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_aesdeclast_si128(a: __m128i, round_key: __m128i) -> __m128i {
|
||||
aesdeclast(a, round_key)
|
||||
}
|
||||
|
||||
/// Perform one round of an AES encryption flow on data (state) in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenc_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aesenc))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_aesenc_si128(a: __m128i, round_key: __m128i) -> __m128i {
|
||||
aesenc(a, round_key)
|
||||
}
|
||||
|
||||
/// Perform the last round of an AES encryption flow on data (state) in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesenclast_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aesenclast))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_aesenclast_si128(a: __m128i, round_key: __m128i) -> __m128i {
|
||||
aesenclast(a, round_key)
|
||||
}
|
||||
|
||||
/// Perform the `InvMixColumns` transformation on `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aesimc_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aesimc))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i {
|
||||
aesimc(a)
|
||||
}
|
||||
|
|
@ -73,10 +88,13 @@ pub unsafe fn _mm_aesimc_si128(a: __m128i) -> __m128i {
|
|||
/// Assist in expanding the AES cipher key by computing steps towards
|
||||
/// generating a round key for encryption cipher using data from `a` and an
|
||||
/// 8-bit round constant `imm8`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_aeskeygenassist_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "aes")]
|
||||
#[cfg_attr(test, assert_instr(aeskeygenassist, imm8 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_aeskeygenassist_si128(a: __m128i, imm8: i32) -> __m128i {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -14,9 +14,12 @@ use stdsimd_test::assert_instr;
|
|||
|
||||
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
|
||||
/// the least significant bits of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
|
||||
_bextr2_u32(
|
||||
a,
|
||||
|
|
@ -29,33 +32,45 @@ pub unsafe fn _bextr_u32(a: u32, start: u32, len: u32) -> u32 {
|
|||
///
|
||||
/// Bits [7,0] of `control` specify the index to the first bit in the range to
|
||||
/// be extracted, and bits [15,8] specify the length of the range.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _bextr2_u32(a: u32, control: u32) -> u32 {
|
||||
x86_bmi_bextr_32(a, control)
|
||||
}
|
||||
|
||||
/// Bitwise logical `AND` of inverted `a` with `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(andn))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _andn_u32(a: u32, b: u32) -> u32 {
|
||||
!a & b
|
||||
}
|
||||
|
||||
/// Extract lowest set isolated bit.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsi))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsi_u32(x: u32) -> u32 {
|
||||
x & x.wrapping_neg()
|
||||
}
|
||||
|
||||
/// Get mask up to lowest set bit.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsmsk))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsmsk_u32(x: u32) -> u32 {
|
||||
x ^ (x.wrapping_sub(1_u32))
|
||||
}
|
||||
|
|
@ -63,9 +78,12 @@ pub unsafe fn _blsmsk_u32(x: u32) -> u32 {
|
|||
/// Resets the lowest set bit of `x`.
|
||||
///
|
||||
/// If `x` is sets CF.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsr))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsr_u32(x: u32) -> u32 {
|
||||
x & (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -73,9 +91,12 @@ pub unsafe fn _blsr_u32(x: u32) -> u32 {
|
|||
/// Counts the number of trailing least significant zero bits.
|
||||
///
|
||||
/// When the source operand is 0, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _tzcnt_u32(x: u32) -> u32 {
|
||||
x.trailing_zeros()
|
||||
}
|
||||
|
|
@ -83,9 +104,12 @@ pub unsafe fn _tzcnt_u32(x: u32) -> u32 {
|
|||
/// Counts the number of trailing least significant zero bits.
|
||||
///
|
||||
/// When the source operand is 0, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_tzcnt_32(x: u32) -> i32 {
|
||||
x.trailing_zeros() as i32
|
||||
}
|
||||
|
|
@ -17,11 +17,14 @@ use stdsimd_test::assert_instr;
|
|||
///
|
||||
/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
|
||||
/// the low half and the high half of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u32)
|
||||
#[inline]
|
||||
// LLVM BUG (should be mulxl): https://bugs.llvm.org/show_bug.cgi?id=34232
|
||||
#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(imul))]
|
||||
#[cfg_attr(all(test, target_arch = "x86"), assert_instr(mulx))]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 {
|
||||
let result: u64 = (a as u64) * (b as u64);
|
||||
*hi = (result >> 32) as u32;
|
||||
|
|
@ -29,27 +32,36 @@ pub unsafe fn _mulx_u32(a: u32, b: u32, hi: &mut u32) -> u32 {
|
|||
}
|
||||
|
||||
/// Zero higher bits of `a` >= `index`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(bzhi))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _bzhi_u32(a: u32, index: u32) -> u32 {
|
||||
x86_bmi2_bzhi_32(a, index)
|
||||
}
|
||||
|
||||
/// Scatter contiguous low order bits of `a` to the result at the positions
|
||||
/// specified by the `mask`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(pdep))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _pdep_u32(a: u32, mask: u32) -> u32 {
|
||||
x86_bmi2_pdep_32(a, mask)
|
||||
}
|
||||
|
||||
/// Gathers the bits of `x` specified by the `mask` into the contiguous low
|
||||
/// order bit positions of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(pext))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _pext_u32(a: u32, mask: u32) -> u32 {
|
||||
x86_bmi2_pext_32(a, mask)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,8 +6,11 @@
|
|||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Return an integer with the reversed byte order of x
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(bswap))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _bswap(x: i32) -> i32 {
|
||||
bswap_i32(x)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,14 +10,19 @@ use stdsimd_test::assert_instr;
|
|||
/// Result of the `cpuid` instruction.
|
||||
#[derive(Copy, Clone, Eq, Ord, PartialEq, PartialOrd)]
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub struct CpuidResult {
|
||||
/// EAX register.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub eax: u32,
|
||||
/// EBX register.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub ebx: u32,
|
||||
/// ECX register.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub ecx: u32,
|
||||
/// EDX register.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub edx: u32,
|
||||
}
|
||||
|
||||
|
|
@ -46,6 +51,7 @@ pub struct CpuidResult {
|
|||
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(cpuid))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
|
||||
let mut r = mem::uninitialized::<CpuidResult>();
|
||||
if cfg!(target_arch = "x86") {
|
||||
|
|
@ -66,6 +72,7 @@ pub unsafe fn __cpuid_count(leaf: u32, sub_leaf: u32) -> CpuidResult {
|
|||
/// See [`__cpuid_count`](fn.__cpuid_count.html).
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(cpuid))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn __cpuid(leaf: u32) -> CpuidResult {
|
||||
__cpuid_count(leaf, 0)
|
||||
}
|
||||
|
|
@ -114,6 +121,7 @@ pub fn has_cpuid() -> bool {
|
|||
/// See also [`__cpuid`](fn.__cpuid.html) and
|
||||
/// [`__cpuid_count`](fn.__cpuid_count.html).
|
||||
#[inline]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) {
|
||||
let CpuidResult { eax, ebx, .. } = __cpuid(leaf);
|
||||
(eax, ebx)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
//! `i386` intrinsics
|
||||
|
||||
/// Reads EFLAGS.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__readeflags)
|
||||
#[cfg(target_arch = "x86")]
|
||||
#[inline(always)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn __readeflags() -> u32 {
|
||||
let eflags: u32;
|
||||
asm!("pushfd; popl $0" : "=r"(eflags) : : : "volatile");
|
||||
|
|
@ -10,8 +13,11 @@ pub unsafe fn __readeflags() -> u32 {
|
|||
}
|
||||
|
||||
/// Reads EFLAGS.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__readeflags)
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[inline(always)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn __readeflags() -> u64 {
|
||||
let eflags: u64;
|
||||
asm!("pushfq; popq $0" : "=r"(eflags) : : : "volatile");
|
||||
|
|
@ -19,15 +25,21 @@ pub unsafe fn __readeflags() -> u64 {
|
|||
}
|
||||
|
||||
/// Write EFLAGS.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__writeeflags)
|
||||
#[cfg(target_arch = "x86")]
|
||||
#[inline(always)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn __writeeflags(eflags: u32) {
|
||||
asm!("pushl $0; popfd" : : "r"(eflags) : "cc", "flags" : "volatile");
|
||||
}
|
||||
|
||||
/// Write EFLAGS.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__writeeflags)
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[inline(always)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn __writeeflags(eflags: u64) {
|
||||
asm!("pushq $0; popfq" : : "r"(eflags) : "cc", "flags" : "volatile");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,9 +21,12 @@ extern "C" {
|
|||
///
|
||||
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
|
||||
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fxsr")]
|
||||
#[cfg_attr(test, assert_instr(fxsave))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _fxsave(mem_addr: *mut u8) {
|
||||
fxsave(mem_addr)
|
||||
}
|
||||
|
|
@ -42,9 +45,12 @@ pub unsafe fn _fxsave(mem_addr: *mut u8) {
|
|||
///
|
||||
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
|
||||
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fxsr")]
|
||||
#[cfg_attr(test, assert_instr(fxrstor))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _fxrstor(mem_addr: *const u8) {
|
||||
fxrstor(mem_addr)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,7 +33,8 @@ types! {
|
|||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
|
||||
/// # #![feature(stdsimd)]
|
||||
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature))]
|
||||
/// # #![cfg_attr(not(dox), no_std)]
|
||||
/// # #[cfg(not(dox))]
|
||||
/// # extern crate std as real_std;
|
||||
|
|
@ -83,7 +84,7 @@ types! {
|
|||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
|
||||
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
|
||||
/// # #![cfg_attr(not(dox), no_std)]
|
||||
/// # #[cfg(not(dox))]
|
||||
/// # extern crate std as real_std;
|
||||
|
|
@ -105,6 +106,7 @@ types! {
|
|||
/// # if is_x86_feature_detected!("sse2") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub struct __m128i(i64, i64);
|
||||
|
||||
/// 128-bit wide set of four `f32` types, x86-specific
|
||||
|
|
@ -126,7 +128,7 @@ types! {
|
|||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
|
||||
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
|
||||
/// # #![cfg_attr(not(dox), no_std)]
|
||||
/// # #[cfg(not(dox))]
|
||||
/// # extern crate std as real_std;
|
||||
|
|
@ -148,6 +150,7 @@ types! {
|
|||
/// # if is_x86_feature_detected!("sse") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub struct __m128(f32, f32, f32, f32);
|
||||
|
||||
/// 128-bit wide set of two `f64` types, x86-specific
|
||||
|
|
@ -169,7 +172,7 @@ types! {
|
|||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
|
||||
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
|
||||
/// # #![cfg_attr(not(dox), no_std)]
|
||||
/// # #[cfg(not(dox))]
|
||||
/// # extern crate std as real_std;
|
||||
|
|
@ -191,6 +194,7 @@ types! {
|
|||
/// # if is_x86_feature_detected!("sse") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub struct __m128d(f64, f64);
|
||||
|
||||
/// 256-bit wide integer vector type, x86-specific
|
||||
|
|
@ -216,7 +220,7 @@ types! {
|
|||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
|
||||
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
|
||||
/// # #![cfg_attr(not(dox), no_std)]
|
||||
/// # #[cfg(not(dox))]
|
||||
/// # extern crate std as real_std;
|
||||
|
|
@ -238,6 +242,7 @@ types! {
|
|||
/// # if is_x86_feature_detected!("avx") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub struct __m256i(i64, i64, i64, i64);
|
||||
|
||||
/// 256-bit wide set of eight `f32` types, x86-specific
|
||||
|
|
@ -259,7 +264,7 @@ types! {
|
|||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
|
||||
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
|
||||
/// # #![cfg_attr(not(dox), no_std)]
|
||||
/// # #[cfg(not(dox))]
|
||||
/// # extern crate std as real_std;
|
||||
|
|
@ -281,6 +286,7 @@ types! {
|
|||
/// # if is_x86_feature_detected!("sse") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub struct __m256(f32, f32, f32, f32, f32, f32, f32, f32);
|
||||
|
||||
/// 256-bit wide set of four `f64` types, x86-specific
|
||||
|
|
@ -302,7 +308,7 @@ types! {
|
|||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # #![feature(cfg_target_feature, target_feature, stdsimd)]
|
||||
/// # #![cfg_attr(not(dox), feature(cfg_target_feature, target_feature, stdsimd))]
|
||||
/// # #![cfg_attr(not(dox), no_std)]
|
||||
/// # #[cfg(not(dox))]
|
||||
/// # extern crate std as real_std;
|
||||
|
|
@ -324,6 +330,7 @@ types! {
|
|||
/// # if is_x86_feature_detected!("avx") { unsafe { foo() } }
|
||||
/// # }
|
||||
/// ```
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub struct __m256d(f64, f64, f64, f64);
|
||||
}
|
||||
|
||||
|
|
@ -334,6 +341,7 @@ pub use self::test::*;
|
|||
|
||||
#[doc(hidden)]
|
||||
#[allow(non_camel_case_types)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub(crate) trait m128iExt: Sized {
|
||||
fn as_m128i(self) -> __m128i;
|
||||
|
||||
|
|
@ -387,6 +395,7 @@ impl m128iExt for __m128i {
|
|||
|
||||
#[doc(hidden)]
|
||||
#[allow(non_camel_case_types)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub(crate) trait m256iExt: Sized {
|
||||
fn as_m256i(self) -> __m256i;
|
||||
|
||||
|
|
@ -590,8 +599,8 @@ pub use self::avx2::*;
|
|||
|
||||
mod abm;
|
||||
pub use self::abm::*;
|
||||
mod bmi;
|
||||
pub use self::bmi::*;
|
||||
mod bmi1;
|
||||
pub use self::bmi1::*;
|
||||
|
||||
mod bmi2;
|
||||
pub use self::bmi2::*;
|
||||
|
|
|
|||
|
|
@ -21,6 +21,8 @@ extern "C" {
|
|||
///
|
||||
/// The immediate byte is used for determining which halves of `a` and `b`
|
||||
/// should be used. Immediate bits other than 0 and 4 are ignored.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clmulepi64_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "pclmulqdq")]
|
||||
#[cfg_attr(all(test, not(target_os = "linux")),
|
||||
|
|
@ -34,6 +36,7 @@ extern "C" {
|
|||
#[cfg_attr(all(test, target_os = "linux"),
|
||||
assert_instr(pclmulhqhqdq, imm8 = 17))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_clmulepi64_si128(
|
||||
a: __m128i, b: __m128i, imm8: i32
|
||||
) -> __m128i {
|
||||
|
|
|
|||
|
|
@ -14,10 +14,13 @@ use stdsimd_test::assert_instr;
|
|||
|
||||
/// Read a hardware generated 16-bit random value and store the result in val.
|
||||
/// Return 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand16_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdrand")]
|
||||
#[cfg_attr(test, assert_instr(rdrand))]
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 {
|
||||
let (v, flag) = x86_rdrand16_step();
|
||||
*val = v;
|
||||
|
|
@ -26,10 +29,13 @@ pub unsafe fn _rdrand16_step(val: &mut u16) -> i32 {
|
|||
|
||||
/// Read a hardware generated 32-bit random value and store the result in val.
|
||||
/// Return 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand32_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdrand")]
|
||||
#[cfg_attr(test, assert_instr(rdrand))]
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 {
|
||||
let (v, flag) = x86_rdrand32_step();
|
||||
*val = v;
|
||||
|
|
@ -38,9 +44,12 @@ pub unsafe fn _rdrand32_step(val: &mut u32) -> i32 {
|
|||
|
||||
/// Read a 16-bit NIST SP800-90B and SP800-90C compliant random value and store
|
||||
/// in val. Return 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed16_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdseed")]
|
||||
#[cfg_attr(test, assert_instr(rdseed))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _rdseed16_step(val: &mut u16) -> i32 {
|
||||
let (v, flag) = x86_rdseed16_step();
|
||||
*val = v;
|
||||
|
|
@ -49,9 +58,12 @@ pub unsafe fn _rdseed16_step(val: &mut u16) -> i32 {
|
|||
|
||||
/// Read a 32-bit NIST SP800-90B and SP800-90C compliant random value and store
|
||||
/// in val. Return 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed32_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdseed")]
|
||||
#[cfg_attr(test, assert_instr(rdseed))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _rdseed32_step(val: &mut u32) -> i32 {
|
||||
let (v, flag) = x86_rdseed32_step();
|
||||
*val = v;
|
||||
|
|
|
|||
|
|
@ -17,8 +17,11 @@ use stdsimd_test::assert_instr;
|
|||
///
|
||||
/// On processors that support the Intel 64 architecture, the
|
||||
/// high-order 32 bits of each of RAX and RDX are cleared.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdtsc)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(rdtsc))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _rdtsc() -> i64 {
|
||||
rdtsc()
|
||||
}
|
||||
|
|
@ -37,8 +40,11 @@ pub unsafe fn _rdtsc() -> i64 {
|
|||
///
|
||||
/// On processors that support the Intel 64 architecture, the
|
||||
/// high-order 32 bits of each of RAX, RDX, and RCX are cleared.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=__rdtscp)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(rdtscp))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn __rdtscp(aux: *mut u32) -> u64 {
|
||||
rdtscp(aux as *mut _)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,9 +26,12 @@ use stdsimd_test::assert_instr;
|
|||
/// Perform an intermediate calculation for the next four SHA1 message values
|
||||
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
|
||||
/// and returning the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1msg1_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1msg1))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(sha1msg1(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
|
@ -36,9 +39,12 @@ pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Perform the final calculation for the next four SHA1 message values
|
||||
/// (unsigned 32-bit integers) using the intermediate result in `a` and the
|
||||
/// previous message values in `b`, and returns the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1msg2_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1msg2))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(sha1msg2(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
|
@ -46,9 +52,12 @@ pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Calculate SHA1 state variable E after four rounds of operation from the
|
||||
/// current SHA1 state variable `a`, add that value to the scheduled values
|
||||
/// (unsigned 32-bit integers) in `b`, and returns the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1nexte_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1nexte))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(sha1nexte(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
|
@ -58,10 +67,13 @@ pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// (unsigned 32-bit integers), and state variable E from `b`, and return the
|
||||
/// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round
|
||||
/// constants.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha1rnds4_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha1rnds4, func = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_sha1rnds4_epu32(
|
||||
a: __m128i, b: __m128i, func: i32
|
||||
) -> __m128i {
|
||||
|
|
@ -79,9 +91,12 @@ pub unsafe fn _mm_sha1rnds4_epu32(
|
|||
/// Perform an intermediate calculation for the next four SHA256 message values
|
||||
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
|
||||
/// and return the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256msg1_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha256msg1))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(sha256msg1(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
|
@ -89,9 +104,12 @@ pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Perform the final calculation for the next four SHA256 message values
|
||||
/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
|
||||
/// and return the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256msg2_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha256msg2))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(sha256msg2(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
|
@ -101,9 +119,12 @@ pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// pre-computed sum of the next 2 round message values (unsigned 32-bit
|
||||
/// integers) and the corresponding round constants from `k`, and store the
|
||||
/// updated SHA256 state (A,B,E,F) in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sha256rnds2_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sha")]
|
||||
#[cfg_attr(test, assert_instr(sha256rnds2))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_sha256rnds2_epu32(
|
||||
a: __m128i, b: __m128i, k: __m128i
|
||||
) -> __m128i {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -9,54 +9,72 @@ use stdsimd_test::assert_instr;
|
|||
|
||||
/// Alternatively add and subtract packed single-precision (32-bit)
|
||||
/// floating-point elements in `a` to/from packed elements in `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(addsubps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
|
||||
addsubps(a, b)
|
||||
}
|
||||
|
||||
/// Alternatively add and subtract packed double-precision (64-bit)
|
||||
/// floating-point elements in `a` to/from packed elements in `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(addsubpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
|
||||
addsubpd(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally add adjacent pairs of double-precision (64-bit)
|
||||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(haddpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
|
||||
haddpd(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally add adjacent pairs of single-precision (32-bit)
|
||||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(haddps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
|
||||
haddps(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally subtract adjacent pairs of double-precision (64-bit)
|
||||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(hsubpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
|
||||
hsubpd(a, b)
|
||||
}
|
||||
|
||||
/// Horizontally add adjacent pairs of single-precision (32-bit)
|
||||
/// floating-point elements in `a` and `b`, and pack the results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(hsubps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
|
||||
hsubps(a, b)
|
||||
}
|
||||
|
|
@ -64,45 +82,60 @@ pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
|
|||
/// Load 128-bits of integer data from unaligned memory.
|
||||
/// This intrinsic may perform better than `_mm_loadu_si128`
|
||||
/// when the data crosses a cache line boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(lddqu))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
|
||||
mem::transmute(lddqu(mem_addr as *const _))
|
||||
}
|
||||
|
||||
/// Duplicate the low double-precision (64-bit) floating-point element
|
||||
/// from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movedup_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(movddup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d {
|
||||
simd_shuffle2(a, a, [0, 0])
|
||||
}
|
||||
|
||||
/// Load a double-precision (64-bit) floating-point element from memory
|
||||
/// into both elements of return vector.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(movddup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
|
||||
_mm_load1_pd(mem_addr)
|
||||
}
|
||||
|
||||
/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
|
||||
/// from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehdup_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(movshdup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 {
|
||||
simd_shuffle4(a, a, [1, 1, 3, 3])
|
||||
}
|
||||
|
||||
/// Duplicate even-indexed single-precision (32-bit) floating-point elements
|
||||
/// from `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_moveldup_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse3")]
|
||||
#[cfg_attr(test, assert_instr(movsldup))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 {
|
||||
simd_shuffle4(a, a, [0, 0, 2, 2])
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,34 +10,47 @@ use stdsimd_test::assert_instr;
|
|||
|
||||
// SSE4 rounding constans
|
||||
/// round to nearest
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
|
||||
/// round down
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01;
|
||||
/// round up
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_TO_POS_INF: i32 = 0x02;
|
||||
/// truncate
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_TO_ZERO: i32 = 0x03;
|
||||
/// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04;
|
||||
/// do not suppress exceptions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_RAISE_EXC: i32 = 0x00;
|
||||
/// suppress exceptions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_NO_EXC: i32 = 0x08;
|
||||
/// round to nearest and do not suppress exceptions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_NINT: i32 = 0x00;
|
||||
/// round down and do not suppress exceptions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_FLOOR: i32 =
|
||||
(_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF);
|
||||
/// round up and do not suppress exceptions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_CEIL: i32 =
|
||||
(_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF);
|
||||
/// truncate and do not suppress exceptions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_TRUNC: i32 = (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO);
|
||||
/// use MXCSR.RC and do not suppress exceptions; see
|
||||
/// `vendor::_MM_SET_ROUNDING_MODE`
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_RINT: i32 =
|
||||
(_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION);
|
||||
/// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE`
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _MM_FROUND_NEARBYINT: i32 =
|
||||
(_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION);
|
||||
|
||||
|
|
@ -46,9 +59,12 @@ pub const _MM_FROUND_NEARBYINT: i32 =
|
|||
/// The high bit of each corresponding mask byte determines the selection.
|
||||
/// If the high bit is set the element of `a` is selected. The element
|
||||
/// of `b` is selected otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pblendvb))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_blendv_epi8(
|
||||
a: __m128i, b: __m128i, mask: __m128i
|
||||
) -> __m128i {
|
||||
|
|
@ -64,10 +80,13 @@ pub unsafe fn _mm_blendv_epi8(
|
|||
/// The mask bits determine the selection. A clear bit selects the
|
||||
/// corresponding element of `a`, and a set bit the corresponding
|
||||
/// element of `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pblendw, imm8 = 0xF0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
let a = a.as_i16x8();
|
||||
let b = b.as_i16x8();
|
||||
|
|
@ -81,28 +100,37 @@ pub unsafe fn _mm_blend_epi16(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
|||
|
||||
/// Blend packed double-precision (64-bit) floating-point elements from `a`
|
||||
/// and `b` using `mask`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(blendvpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
|
||||
blendvpd(a, b, mask)
|
||||
}
|
||||
|
||||
/// Blend packed single-precision (32-bit) floating-point elements from `a`
|
||||
/// and `b` using `mask`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(blendvps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
|
||||
blendvps(a, b, mask)
|
||||
}
|
||||
|
||||
/// Blend packed double-precision (64-bit) floating-point elements from `a`
|
||||
/// and `b` using control mask `imm2`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(blendpd, imm2 = 0b10))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {
|
||||
macro_rules! call {
|
||||
($imm2:expr) => {
|
||||
|
|
@ -114,10 +142,13 @@ pub unsafe fn _mm_blend_pd(a: __m128d, b: __m128d, imm2: i32) -> __m128d {
|
|||
|
||||
/// Blend packed single-precision (32-bit) floating-point elements from `a`
|
||||
/// and `b` using mask `imm4`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(blendps, imm4 = 0b0101))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => {
|
||||
|
|
@ -129,11 +160,14 @@ pub unsafe fn _mm_blend_ps(a: __m128, b: __m128, imm4: i32) -> __m128 {
|
|||
|
||||
/// Extract a single-precision (32-bit) floating-point element from `a`,
|
||||
/// selected with `imm8`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
// TODO: Add test for Windows
|
||||
#[cfg_attr(test, assert_instr(extractps, imm8 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 {
|
||||
mem::transmute(simd_extract::<_, f32>(a, imm8 as u32 & 0b11))
|
||||
}
|
||||
|
|
@ -142,21 +176,27 @@ pub unsafe fn _mm_extract_ps(a: __m128, imm8: i32) -> i32 {
|
|||
/// integer containing the zero-extended integer data.
|
||||
///
|
||||
/// See [LLVM commit D20468][https://reviews.llvm.org/D20468].
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pextrb, imm8 = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_extract_epi8(a: __m128i, imm8: i32) -> i32 {
|
||||
let imm8 = (imm8 & 15) as u32;
|
||||
simd_extract::<_, u8>(a.as_u8x16(), imm8) as i32
|
||||
}
|
||||
|
||||
/// Extract an 32-bit integer from `a` selected with `imm8`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
// TODO: Add test for Windows
|
||||
#[cfg_attr(test, assert_instr(extractps, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 {
|
||||
let imm8 = (imm8 & 3) as u32;
|
||||
simd_extract::<_, i32>(a.as_i32x4(), imm8)
|
||||
|
|
@ -184,10 +224,13 @@ pub unsafe fn _mm_extract_epi32(a: __m128i, imm8: i32) -> i32 {
|
|||
///
|
||||
/// * Bits `[3:0]`: If any of these bits are set, the corresponding result
|
||||
/// element is cleared.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(insertps, imm8 = 0b1010))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
|
|
@ -199,10 +242,13 @@ pub unsafe fn _mm_insert_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
|
|||
|
||||
/// Return a copy of `a` with the 8-bit integer from `i` inserted at a
|
||||
/// location specified by `imm8`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pinsrb, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
|
||||
mem::transmute(simd_insert(
|
||||
a.as_i8x16(),
|
||||
|
|
@ -213,10 +259,13 @@ pub unsafe fn _mm_insert_epi8(a: __m128i, i: i32, imm8: i32) -> __m128i {
|
|||
|
||||
/// Return a copy of `a` with the 32-bit integer from `i` inserted at a
|
||||
/// location specified by `imm8`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pinsrd, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i {
|
||||
mem::transmute(simd_insert(
|
||||
a.as_i32x4(),
|
||||
|
|
@ -227,97 +276,130 @@ pub unsafe fn _mm_insert_epi32(a: __m128i, i: i32, imm8: i32) -> __m128i {
|
|||
|
||||
/// Compare packed 8-bit integers in `a` and `b` and return packed maximum
|
||||
/// values in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmaxsb))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pmaxsb(a.as_i8x16(), b.as_i8x16()))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
|
||||
/// maximum.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmaxuw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pmaxuw(a.as_u16x8(), b.as_u16x8()))
|
||||
}
|
||||
|
||||
/// Compare packed 32-bit integers in `a` and `b`, and return packed maximum
|
||||
/// values.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmaxsd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pmaxsd(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
|
||||
/// maximum values.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmaxud))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pmaxud(a.as_u32x4(), b.as_u32x4()))
|
||||
}
|
||||
|
||||
/// Compare packed 8-bit integers in `a` and `b` and return packed minimum
|
||||
/// values in dst.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pminsb))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pminsb(a.as_i8x16(), b.as_i8x16()))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 16-bit integers in `a` and `b`, and return packed
|
||||
/// minimum.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pminuw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pminuw(a.as_u16x8(), b.as_u16x8()))
|
||||
}
|
||||
|
||||
/// Compare packed 32-bit integers in `a` and `b`, and return packed minimum
|
||||
/// values.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pminsd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pminsd(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
||||
/// Compare packed unsigned 32-bit integers in `a` and `b`, and return packed
|
||||
/// minimum values.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pminud))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pminud(a.as_u32x4(), b.as_u32x4()))
|
||||
}
|
||||
|
||||
/// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers
|
||||
/// using unsigned saturation
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(packusdw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(packusdw(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
||||
/// Compare packed 64-bit integers in `a` and `b` for equality
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pcmpeqq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
|
||||
}
|
||||
|
||||
/// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxbw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
|
||||
let a = a.as_i8x16();
|
||||
let a = simd_shuffle8::<_, i8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
|
|
@ -325,9 +407,12 @@ pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
|
|||
}
|
||||
|
||||
/// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxbd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_i8x16();
|
||||
let a = simd_shuffle4::<_, i8x4>(a, a, [0, 1, 2, 3]);
|
||||
|
|
@ -336,9 +421,12 @@ pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
|
|||
|
||||
/// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed
|
||||
/// 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxbq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_i8x16();
|
||||
let a = simd_shuffle2::<_, i8x2>(a, a, [0, 1]);
|
||||
|
|
@ -346,9 +434,12 @@ pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
|
|||
}
|
||||
|
||||
/// Sign extend packed 16-bit integers in `a` to packed 32-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxwd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_i16x8();
|
||||
let a = simd_shuffle4::<_, i16x4>(a, a, [0, 1, 2, 3]);
|
||||
|
|
@ -356,9 +447,12 @@ pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
|
|||
}
|
||||
|
||||
/// Sign extend packed 16-bit integers in `a` to packed 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxwq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_i16x8();
|
||||
let a = simd_shuffle2::<_, i16x2>(a, a, [0, 1]);
|
||||
|
|
@ -366,9 +460,12 @@ pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
|
|||
}
|
||||
|
||||
/// Sign extend packed 32-bit integers in `a` to packed 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovsxdq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_i32x4();
|
||||
let a = simd_shuffle2::<_, i32x2>(a, a, [0, 1]);
|
||||
|
|
@ -376,9 +473,12 @@ pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
|
|||
}
|
||||
|
||||
/// Zero extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxbw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
|
||||
let a = a.as_u8x16();
|
||||
let a = simd_shuffle8::<_, u8x8>(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
|
||||
|
|
@ -386,9 +486,12 @@ pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
|
|||
}
|
||||
|
||||
/// Zero extend packed unsigned 8-bit integers in `a` to packed 32-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxbd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_u8x16();
|
||||
let a = simd_shuffle4::<_, u8x4>(a, a, [0, 1, 2, 3]);
|
||||
|
|
@ -396,9 +499,12 @@ pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
|
|||
}
|
||||
|
||||
/// Zero extend packed unsigned 8-bit integers in `a` to packed 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxbq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_u8x16();
|
||||
let a = simd_shuffle2::<_, u8x2>(a, a, [0, 1]);
|
||||
|
|
@ -407,9 +513,12 @@ pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
|
|||
|
||||
/// Zero extend packed unsigned 16-bit integers in `a`
|
||||
/// to packed 32-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxwd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
|
||||
let a = a.as_u16x8();
|
||||
let a = simd_shuffle4::<_, u16x4>(a, a, [0, 1, 2, 3]);
|
||||
|
|
@ -418,9 +527,12 @@ pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
|
|||
|
||||
/// Zero extend packed unsigned 16-bit integers in `a`
|
||||
/// to packed 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxwq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_u16x8();
|
||||
let a = simd_shuffle2::<_, u16x2>(a, a, [0, 1]);
|
||||
|
|
@ -429,9 +541,12 @@ pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
|
|||
|
||||
/// Zero extend packed unsigned 32-bit integers in `a`
|
||||
/// to packed 64-bit integers
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmovzxdq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
|
||||
let a = a.as_u32x4();
|
||||
let a = simd_shuffle2::<_, u32x2>(a, a, [0, 1]);
|
||||
|
|
@ -445,10 +560,13 @@ pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
|
|||
/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
|
||||
/// the dot product will be stored in the return value component. Otherwise if
|
||||
/// the broadcast mask bit is zero then the return component will be zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(dppd, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
|
|
@ -465,10 +583,13 @@ pub unsafe fn _mm_dp_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
|
|||
/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
|
||||
/// the dot product will be stored in the return value component. Otherwise if
|
||||
/// the broadcast mask bit is zero then the return component will be zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(dpps, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm8:expr) => {
|
||||
|
|
@ -481,9 +602,12 @@ pub unsafe fn _mm_dp_ps(a: __m128, b: __m128, imm8: i32) -> __m128 {
|
|||
/// Round the packed double-precision (64-bit) floating-point elements in `a`
|
||||
/// down to an integer value, and store the results as packed double-precision
|
||||
/// floating-point elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d {
|
||||
roundpd(a, _MM_FROUND_FLOOR)
|
||||
}
|
||||
|
|
@ -491,9 +615,12 @@ pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d {
|
|||
/// Round the packed single-precision (32-bit) floating-point elements in `a`
|
||||
/// down to an integer value, and store the results as packed single-precision
|
||||
/// floating-point elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 {
|
||||
roundps(a, _MM_FROUND_FLOOR)
|
||||
}
|
||||
|
|
@ -503,9 +630,12 @@ pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 {
|
|||
/// floating-point element in the lower element of the intrinsic result,
|
||||
/// and copy the upper element from `a` to the upper element of the intrinsic
|
||||
/// result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundsd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
|
||||
roundsd(a, b, _MM_FROUND_FLOOR)
|
||||
}
|
||||
|
|
@ -515,9 +645,12 @@ pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
|
|||
/// floating-point element in the lower element of the intrinsic result,
|
||||
/// and copy the upper 3 packed elements from `a` to the upper elements
|
||||
/// of the intrinsic result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundss))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
|
||||
roundss(a, b, _MM_FROUND_FLOOR)
|
||||
}
|
||||
|
|
@ -525,9 +658,12 @@ pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// Round the packed double-precision (64-bit) floating-point elements in `a`
|
||||
/// up to an integer value, and store the results as packed double-precision
|
||||
/// floating-point elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d {
|
||||
roundpd(a, _MM_FROUND_CEIL)
|
||||
}
|
||||
|
|
@ -535,9 +671,12 @@ pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d {
|
|||
/// Round the packed single-precision (32-bit) floating-point elements in `a`
|
||||
/// up to an integer value, and store the results as packed single-precision
|
||||
/// floating-point elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 {
|
||||
roundps(a, _MM_FROUND_CEIL)
|
||||
}
|
||||
|
|
@ -547,9 +686,12 @@ pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 {
|
|||
/// floating-point element in the lower element of the intrisic result,
|
||||
/// and copy the upper element from `a` to the upper element
|
||||
/// of the intrinsic result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundsd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
|
||||
roundsd(a, b, _MM_FROUND_CEIL)
|
||||
}
|
||||
|
|
@ -559,9 +701,12 @@ pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
|
|||
/// floating-point element in the lower element of the intrinsic result,
|
||||
/// and copy the upper 3 packed elements from `a` to the upper elements
|
||||
/// of the intrinsic result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundss))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
|
||||
roundss(a, b, _MM_FROUND_CEIL)
|
||||
}
|
||||
|
|
@ -602,10 +747,13 @@ pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
|
|||
/// _MM_FROUND_CUR_DIRECTION;
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundpd, rounding = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => {
|
||||
|
|
@ -652,10 +800,13 @@ pub unsafe fn _mm_round_pd(a: __m128d, rounding: i32) -> __m128d {
|
|||
/// _MM_FROUND_CUR_DIRECTION;
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundps, rounding = 0))]
|
||||
#[rustc_args_required_const(1)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => {
|
||||
|
|
@ -703,10 +854,13 @@ pub unsafe fn _mm_round_ps(a: __m128, rounding: i32) -> __m128 {
|
|||
/// _MM_FROUND_CUR_DIRECTION;
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundsd, rounding = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => {
|
||||
|
|
@ -754,10 +908,13 @@ pub unsafe fn _mm_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
|
|||
/// _MM_FROUND_CUR_DIRECTION;
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(roundss, rounding = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
|
||||
macro_rules! call {
|
||||
($imm4:expr) => {
|
||||
|
|
@ -786,18 +943,24 @@ pub unsafe fn _mm_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
|
|||
/// * bits `[15:0]` - contain the minimum value found in parameter `a`,
|
||||
/// * bits `[18:16]` - contain the index of the minimum value
|
||||
/// * remaining bits are set to `0`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(phminposuw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i {
|
||||
mem::transmute(phminposuw(a.as_u16x8()))
|
||||
}
|
||||
|
||||
/// Multiply the low 32-bit integers from each packed 64-bit
|
||||
/// element in `a` and `b`, and return the signed 64-bit result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmuldq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pmuldq(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
|
@ -808,9 +971,12 @@ pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping
|
||||
/// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would
|
||||
/// return a negative number.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pmulld))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(simd_mul(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
|
@ -846,10 +1012,13 @@ pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
///
|
||||
/// * A `__m128i` vector containing the sums of the sets of
|
||||
/// absolute differences between both operands.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mpsadbw_epu8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(mpsadbw, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
let a = a.as_u8x16();
|
||||
let b = b.as_u8x16();
|
||||
|
|
@ -874,9 +1043,12 @@ pub unsafe fn _mm_mpsadbw_epu8(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
|||
///
|
||||
/// * `1` - if the specified bits are all zeros,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
|
||||
ptestz(a.as_i64x2(), mask.as_i64x2())
|
||||
}
|
||||
|
|
@ -894,9 +1066,12 @@ pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
|
|||
///
|
||||
/// * `1` - if the specified bits are all ones,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
|
||||
ptestc(a.as_i64x2(), mask.as_i64x2())
|
||||
}
|
||||
|
|
@ -914,9 +1089,12 @@ pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
|
|||
///
|
||||
/// * `1` - if the specified bits are neither all zeros nor all ones,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
|
||||
ptestnzc(a.as_i64x2(), mask.as_i64x2())
|
||||
}
|
||||
|
|
@ -934,9 +1112,12 @@ pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
|
|||
///
|
||||
/// * `1` - if the specified bits are all zeros,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
|
||||
_mm_testz_si128(a, mask)
|
||||
}
|
||||
|
|
@ -952,10 +1133,13 @@ pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
|
|||
///
|
||||
/// * `1` - if the bits specified in the operand are all set to 1,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pcmpeqd))]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
|
||||
_mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
|
||||
}
|
||||
|
|
@ -973,9 +1157,12 @@ pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
|
|||
///
|
||||
/// * `1` - if the specified bits are neither all zeros nor all ones,
|
||||
/// * `0` - otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_mix_ones_zeros)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(ptest))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
|
||||
_mm_testnzc_si128(a, mask)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,49 +10,68 @@ use coresimd::simd_llvm::*;
|
|||
use coresimd::x86::*;
|
||||
|
||||
/// String contains unsigned 8-bit characters *(Default)*
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000;
|
||||
/// String contains unsigned 16-bit characters
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_UWORD_OPS: i32 = 0b0000_0001;
|
||||
/// String contains signed 8-bit characters
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_SBYTE_OPS: i32 = 0b0000_0010;
|
||||
/// String contains unsigned 16-bit characters
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_SWORD_OPS: i32 = 0b0000_0011;
|
||||
|
||||
/// For each character in `a`, find if it is in `b` *(Default)*
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_CMP_EQUAL_ANY: i32 = 0b0000_0000;
|
||||
/// For each character in `a`, determine if
|
||||
/// `b[0] <= c <= b[1] or b[1] <= c <= b[2]...`
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_CMP_RANGES: i32 = 0b0000_0100;
|
||||
/// The strings defined by `a` and `b` are equal
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_CMP_EQUAL_EACH: i32 = 0b0000_1000;
|
||||
/// Search for the defined substring in the target
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100;
|
||||
|
||||
/// Do not negate results *(Default)*
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000;
|
||||
/// Negate results
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000;
|
||||
/// Do not negate results before the end of the string
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000;
|
||||
/// Negate results only before the end of the string
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000;
|
||||
|
||||
/// **Index only**: return the least significant bit *(Default)*
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_LEAST_SIGNIFICANT: i32 = 0b0000_0000;
|
||||
/// **Index only**: return the most significant bit
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_MOST_SIGNIFICANT: i32 = 0b0100_0000;
|
||||
|
||||
/// **Mask only**: return the bit mask
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_BIT_MASK: i32 = 0b0000_0000;
|
||||
/// **Mask only**: return the byte mask
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000;
|
||||
|
||||
/// Compare packed strings with implicit lengths in `a` and `b` using the
|
||||
/// control in `imm8`, and return the generated mask.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrm)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -296,10 +315,13 @@ pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
|
|||
/// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html
|
||||
/// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
|
||||
/// [`_mm_cmpestri`]: fn._mm_cmpestri.html
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistri)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -314,10 +336,13 @@ pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
/// Compare packed strings with implicit lengths in `a` and `b` using the
|
||||
/// control in `imm8`, and return `1` if any character in `b` was null.
|
||||
/// and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrz)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -332,10 +357,13 @@ pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
/// Compare packed strings with implicit lengths in `a` and `b` using the
|
||||
/// control in `imm8`, and return `1` if the resulting mask was non-zero,
|
||||
/// and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrc)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -350,10 +378,13 @@ pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
/// Compare packed strings with implicit lengths in `a` and `b` using the
|
||||
/// control in `imm8`, and returns `1` if any character in `a` was null,
|
||||
/// and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrs)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -367,10 +398,13 @@ pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
|
||||
/// Compare packed strings with implicit lengths in `a` and `b` using the
|
||||
/// control in `imm8`, and return bit `0` of the resulting bit mask.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistro)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -385,10 +419,13 @@ pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
/// Compare packed strings with implicit lengths in `a` and `b` using the
|
||||
/// control in `imm8`, and return `1` if `b` did not contain a null
|
||||
/// character and the resulting mask was zero, and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistra)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
||||
let a = a.as_i8x16();
|
||||
let b = b.as_i8x16();
|
||||
|
|
@ -402,10 +439,13 @@ pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
|
|||
|
||||
/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
|
||||
/// using the control in `imm8`, and return the generated mask.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestrm, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpestrm(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> __m128i {
|
||||
|
|
@ -506,10 +546,13 @@ pub unsafe fn _mm_cmpestrm(
|
|||
/// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html
|
||||
/// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
|
||||
/// [`_mm_cmpistri`]: fn._mm_cmpistri.html
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestri)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpestri(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
@ -526,10 +569,13 @@ pub unsafe fn _mm_cmpestri(
|
|||
/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
|
||||
/// using the control in `imm8`, and return `1` if any character in
|
||||
/// `b` was null, and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrz)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpestrz(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
@ -546,10 +592,13 @@ pub unsafe fn _mm_cmpestrz(
|
|||
/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
|
||||
/// using the control in `imm8`, and return `1` if the resulting mask
|
||||
/// was non-zero, and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrc)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpestrc(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
@ -566,10 +615,13 @@ pub unsafe fn _mm_cmpestrc(
|
|||
/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
|
||||
/// using the control in `imm8`, and return `1` if any character in
|
||||
/// a was null, and `0` otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrs)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpestrs(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
@ -586,10 +638,13 @@ pub unsafe fn _mm_cmpestrs(
|
|||
/// Compare packed strings in `a` and `b` with lengths `la` and `lb`
|
||||
/// using the control in `imm8`, and return bit `0` of the resulting
|
||||
/// bit mask.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestro)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpestro(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
@ -607,10 +662,13 @@ pub unsafe fn _mm_cmpestro(
|
|||
/// using the control in `imm8`, and return `1` if `b` did not
|
||||
/// contain a null character and the resulting mask was zero, and `0`
|
||||
/// otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestra)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
|
||||
#[rustc_args_required_const(4)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpestra(
|
||||
a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32
|
||||
) -> i32 {
|
||||
|
|
@ -626,36 +684,48 @@ pub unsafe fn _mm_cmpestra(
|
|||
|
||||
/// Starting with the initial value in `crc`, return the accumulated
|
||||
/// CRC32 value for unsigned 8-bit integer `v`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(crc32))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 {
|
||||
crc32_32_8(crc, v)
|
||||
}
|
||||
|
||||
/// Starting with the initial value in `crc`, return the accumulated
|
||||
/// CRC32 value for unsigned 16-bit integer `v`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(crc32))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 {
|
||||
crc32_32_16(crc, v)
|
||||
}
|
||||
|
||||
/// Starting with the initial value in `crc`, return the accumulated
|
||||
/// CRC32 value for unsigned 32-bit integer `v`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(crc32))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
|
||||
crc32_32_32(crc, v)
|
||||
}
|
||||
|
||||
/// Compare packed 64-bit integers in `a` and `b` for greater-than,
|
||||
/// return the results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(pcmpgtq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ extern "C" {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4a")]
|
||||
#[cfg_attr(test, assert_instr(extrq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
|
||||
mem::transmute(extrq(x.as_i64x2(), y.as_i8x16()))
|
||||
}
|
||||
|
|
@ -52,6 +53,7 @@ pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4a")]
|
||||
#[cfg_attr(test, assert_instr(insertq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
|
||||
mem::transmute(insertq(x.as_i64x2(), y.as_i64x2()))
|
||||
}
|
||||
|
|
@ -60,6 +62,7 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4a")]
|
||||
#[cfg_attr(test, assert_instr(movntsd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
|
||||
movntsd(p, a);
|
||||
}
|
||||
|
|
@ -68,6 +71,7 @@ pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
|
|||
#[inline]
|
||||
#[target_feature(enable = "sse4a")]
|
||||
#[cfg_attr(test, assert_instr(movntss))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
|
||||
movntss(p, a);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,9 +10,12 @@ use stdsimd_test::assert_instr;
|
|||
|
||||
/// Compute the absolute value of packed 8-bit signed integers in `a` and
|
||||
/// return the unsigned results.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pabsb))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
|
||||
mem::transmute(pabsb128(a.as_i8x16()))
|
||||
}
|
||||
|
|
@ -20,9 +23,12 @@ pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
|
|||
/// Compute the absolute value of each of the packed 16-bit signed integers in
|
||||
/// `a` and
|
||||
/// return the 16-bit unsigned integer
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pabsw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
|
||||
mem::transmute(pabsw128(a.as_i16x8()))
|
||||
}
|
||||
|
|
@ -30,9 +36,12 @@ pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
|
|||
/// Compute the absolute value of each of the packed 32-bit signed integers in
|
||||
/// `a` and
|
||||
/// return the 32-bit unsigned integer
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pabsd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
|
||||
mem::transmute(pabsd128(a.as_i32x4()))
|
||||
}
|
||||
|
|
@ -61,19 +70,25 @@ pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
|
|||
/// r
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pshufb))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
|
||||
}
|
||||
|
||||
/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
|
||||
/// shift the result right by `n` bytes, and return the low 16 bytes.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(palignr, n = 15))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
|
||||
let n = n as u32;
|
||||
// If palignr is shifting the pair of vectors more than the size of two
|
||||
|
|
@ -141,9 +156,12 @@ pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
|
|||
|
||||
/// Horizontally add the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of [8 x i16].
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phaddw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
|
||||
}
|
||||
|
|
@ -151,27 +169,36 @@ pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Horizontally add the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
|
||||
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phaddsw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
|
||||
}
|
||||
|
||||
/// Horizontally add the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of [4 x i32].
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phaddd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
||||
/// Horizontally subtract the adjacent pairs of values contained in 2
|
||||
/// packed 128-bit vectors of [8 x i16].
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phsubw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
|
||||
}
|
||||
|
|
@ -180,18 +207,24 @@ pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
|
||||
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
|
||||
/// saturated to 8000h.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phsubsw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
|
||||
}
|
||||
|
||||
/// Horizontally subtract the adjacent pairs of values contained in 2
|
||||
/// packed 128-bit vectors of [4 x i32].
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(phsubd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
|
@ -201,9 +234,12 @@ pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// integer values contained in the second source operand, add pairs of
|
||||
/// contiguous products with signed saturation, and writes the 16-bit sums to
|
||||
/// the corresponding bits in the destination.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pmaddubsw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
|
||||
}
|
||||
|
|
@ -211,9 +247,12 @@ pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// Multiply packed 16-bit signed integer values, truncate the 32-bit
|
||||
/// product to the 18 most significant bits by right-shifting, round the
|
||||
/// truncated value by adding 1, and write bits [16:1] to the destination.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(pmulhrsw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
|
||||
}
|
||||
|
|
@ -222,9 +261,12 @@ pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// integer in `b` is negative, and return the result.
|
||||
/// Elements in result are zeroed out when the corresponding element in `b`
|
||||
/// is zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi8)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(psignb))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
|
||||
}
|
||||
|
|
@ -233,9 +275,12 @@ pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// integer in `b` is negative, and return the results.
|
||||
/// Elements in result are zeroed out when the corresponding element in `b`
|
||||
/// is zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi16)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(psignw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
|
||||
}
|
||||
|
|
@ -244,9 +289,12 @@ pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
|
|||
/// integer in `b` is negative, and return the results.
|
||||
/// Element in result are zeroed out when the corresponding element in `b`
|
||||
/// is zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "ssse3")]
|
||||
#[cfg_attr(test, assert_instr(psignd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
|
||||
mem::transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@ pub fn _bextr2_u64(a: u64, control: u64) -> u64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcfill))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcfill_u32(x: u32) -> u32 {
|
||||
x & (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -81,6 +82,7 @@ pub unsafe fn _blcfill_u32(x: u32) -> u32 {
|
|||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcfill))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcfill_u64(x: u64) -> u64 {
|
||||
x & (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -91,6 +93,7 @@ pub unsafe fn _blcfill_u64(x: u64) -> u64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blci))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blci_u32(x: u32) -> u32 {
|
||||
x | !(x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -102,6 +105,7 @@ pub unsafe fn _blci_u32(x: u32) -> u32 {
|
|||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blci))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blci_u64(x: u64) -> u64 {
|
||||
x | !(x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -112,6 +116,7 @@ pub unsafe fn _blci_u64(x: u64) -> u64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcic))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcic_u32(x: u32) -> u32 {
|
||||
!x & (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -123,6 +128,7 @@ pub unsafe fn _blcic_u32(x: u32) -> u32 {
|
|||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcic))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcic_u64(x: u64) -> u64 {
|
||||
!x & (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -134,6 +140,7 @@ pub unsafe fn _blcic_u64(x: u64) -> u64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcmsk))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcmsk_u32(x: u32) -> u32 {
|
||||
x ^ (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -146,6 +153,7 @@ pub unsafe fn _blcmsk_u32(x: u32) -> u32 {
|
|||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcmsk))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
|
||||
x ^ (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -156,6 +164,7 @@ pub unsafe fn _blcmsk_u64(x: u64) -> u64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcs))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcs_u32(x: u32) -> u32 {
|
||||
x | (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -167,6 +176,7 @@ pub unsafe fn _blcs_u32(x: u32) -> u32 {
|
|||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blcs))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blcs_u64(x: u64) -> u64 {
|
||||
x | x.wrapping_add(1)
|
||||
}
|
||||
|
|
@ -177,6 +187,7 @@ pub unsafe fn _blcs_u64(x: u64) -> u64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blsfill))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsfill_u32(x: u32) -> u32 {
|
||||
x | (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -188,6 +199,7 @@ pub unsafe fn _blsfill_u32(x: u32) -> u32 {
|
|||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blsfill))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsfill_u64(x: u64) -> u64 {
|
||||
x | (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -198,6 +210,7 @@ pub unsafe fn _blsfill_u64(x: u64) -> u64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blsic))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsic_u32(x: u32) -> u32 {
|
||||
!x | (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -209,6 +222,7 @@ pub unsafe fn _blsic_u32(x: u32) -> u32 {
|
|||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(blsic))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsic_u64(x: u64) -> u64 {
|
||||
!x | (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -220,6 +234,7 @@ pub unsafe fn _blsic_u64(x: u64) -> u64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(t1mskc))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _t1mskc_u32(x: u32) -> u32 {
|
||||
!x | (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -232,6 +247,7 @@ pub unsafe fn _t1mskc_u32(x: u32) -> u32 {
|
|||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(t1mskc))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
|
||||
!x | (x.wrapping_add(1))
|
||||
}
|
||||
|
|
@ -243,6 +259,7 @@ pub unsafe fn _t1mskc_u64(x: u64) -> u64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(tzmsk))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _tzmsk_u32(x: u32) -> u32 {
|
||||
!x & (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -255,6 +272,7 @@ pub unsafe fn _tzmsk_u32(x: u32) -> u32 {
|
|||
#[target_feature(enable = "tbm")]
|
||||
#[cfg_attr(test, assert_instr(tzmsk))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _tzmsk_u64(x: u64) -> u64 {
|
||||
!x & (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,9 +31,12 @@ extern "C" {
|
|||
///
|
||||
/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of
|
||||
/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xsave))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) {
|
||||
xsave(
|
||||
mem_addr,
|
||||
|
|
@ -48,9 +51,12 @@ pub unsafe fn _xsave(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and
|
||||
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
|
||||
/// boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xrstor))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) {
|
||||
xrstor(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
|
||||
}
|
||||
|
|
@ -58,24 +64,31 @@ pub unsafe fn _xrstor(mem_addr: *const u8, rs_mask: u64) {
|
|||
/// `XFEATURE_ENABLED_MASK` for `XCR`
|
||||
///
|
||||
/// This intrinsic maps to `XSETBV` instruction.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub const _XCR_XFEATURE_ENABLED_MASK: u32 = 0;
|
||||
|
||||
/// Copy 64-bits from `val` to the extended control register (`XCR`) specified
|
||||
/// by `a`.
|
||||
///
|
||||
/// Currently only `XFEATURE_ENABLED_MASK` `XCR` is supported.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsetbv)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xsetbv))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xsetbv(a: u32, val: u64) {
|
||||
xsetbv(a, (val >> 32) as u32, val as u32);
|
||||
}
|
||||
|
||||
/// Reads the contents of the extended control register `XCR`
|
||||
/// specified in `xcr_no`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xgetbv)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xgetbv))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
|
||||
let eax: u32;
|
||||
let edx: u32;
|
||||
|
|
@ -90,9 +103,12 @@ pub unsafe fn _xgetbv(xcr_no: u32) -> u64 {
|
|||
/// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize
|
||||
/// the manner in which data is saved. The performance of this instruction will
|
||||
/// be equal to or better than using the `XSAVE` instruction.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaveopt")]
|
||||
#[cfg_attr(test, assert_instr(xsaveopt))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
|
||||
xsaveopt(
|
||||
mem_addr,
|
||||
|
|
@ -107,9 +123,12 @@ pub unsafe fn _xsaveopt(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// `xsavec` differs from `xsave` in that it uses compaction and that it may
|
||||
/// use init optimization. State is saved based on bits [62:0] in `save_mask`
|
||||
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsavec")]
|
||||
#[cfg_attr(test, assert_instr(xsavec))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
|
||||
xsavec(
|
||||
mem_addr,
|
||||
|
|
@ -125,9 +144,12 @@ pub unsafe fn _xsavec(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the
|
||||
/// modified optimization. State is saved based on bits [62:0] in `save_mask`
|
||||
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaves")]
|
||||
#[cfg_attr(test, assert_instr(xsaves))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) {
|
||||
xsaves(
|
||||
mem_addr,
|
||||
|
|
@ -145,9 +167,12 @@ pub unsafe fn _xsaves(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and
|
||||
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
|
||||
/// boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaves")]
|
||||
#[cfg_attr(test, assert_instr(xrstors))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xrstors(mem_addr: *const u8, rs_mask: u64) {
|
||||
xrstors(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,17 +23,23 @@ use stdsimd_test::assert_instr;
|
|||
/// Counts the leading most significant zero bits.
|
||||
///
|
||||
/// When the operand is zero, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "lzcnt")]
|
||||
#[cfg_attr(test, assert_instr(lzcnt))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _lzcnt_u64(x: u64) -> u64 {
|
||||
x.leading_zeros() as u64
|
||||
}
|
||||
|
||||
/// Counts the bits that are set.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "popcnt")]
|
||||
#[cfg_attr(test, assert_instr(popcnt))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _popcnt64(x: i64) -> i32 {
|
||||
x.count_ones() as i32
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,10 +19,13 @@ use mem;
|
|||
|
||||
/// Copy `a` to result, and insert the 64-bit integer `i` into result
|
||||
/// at the location specified by `index`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi64)
|
||||
#[inline]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[target_feature(enable = "avx")]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64, index: i32) -> __m256i {
|
||||
mem::transmute(simd_insert(a.as_i64x4(), (index as u32) & 3, i))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,10 +22,13 @@ use coresimd::simd_llvm::*;
|
|||
use coresimd::x86::*;
|
||||
|
||||
/// Extract a 64-bit integer from `a`, selected with `imm8`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx2")]
|
||||
#[rustc_args_required_const(1)]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_extract_epi64(a: __m256i, imm8: i32) -> i64 {
|
||||
let imm8 = (imm8 & 3) as u32;
|
||||
simd_extract(a.as_i64x4(), imm8)
|
||||
|
|
|
|||
|
|
@ -14,10 +14,13 @@ use stdsimd_test::assert_instr;
|
|||
|
||||
/// Extracts bits in range [`start`, `start` + `length`) from `a` into
|
||||
/// the least significant bits of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 {
|
||||
_bextr2_u64(a, ((start & 0xff) | ((len & 0xff) << 8)) as u64)
|
||||
}
|
||||
|
|
@ -27,36 +30,48 @@ pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 {
|
|||
///
|
||||
/// Bits [7,0] of `control` specify the index to the first bit in the range to
|
||||
/// be extracted, and bits [15,8] specify the length of the range.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(bextr))]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 {
|
||||
x86_bmi_bextr_64(a, control)
|
||||
}
|
||||
|
||||
/// Bitwise logical `AND` of inverted `a` with `b`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(andn))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 {
|
||||
!a & b
|
||||
}
|
||||
|
||||
/// Extract lowest set isolated bit.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsi))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsi_u64(x: u64) -> u64 {
|
||||
x & x.wrapping_neg()
|
||||
}
|
||||
|
||||
/// Get mask up to lowest set bit.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsmsk))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsmsk_u64(x: u64) -> u64 {
|
||||
x ^ (x.wrapping_sub(1_u64))
|
||||
}
|
||||
|
|
@ -64,10 +79,13 @@ pub unsafe fn _blsmsk_u64(x: u64) -> u64 {
|
|||
/// Resets the lowest set bit of `x`.
|
||||
///
|
||||
/// If `x` is sets CF.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(blsr))]
|
||||
#[cfg(not(target_arch = "x86"))] // generates lots of instructions
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _blsr_u64(x: u64) -> u64 {
|
||||
x & (x.wrapping_sub(1))
|
||||
}
|
||||
|
|
@ -75,9 +93,12 @@ pub unsafe fn _blsr_u64(x: u64) -> u64 {
|
|||
/// Counts the number of trailing least significant zero bits.
|
||||
///
|
||||
/// When the source operand is 0, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _tzcnt_u64(x: u64) -> u64 {
|
||||
x.trailing_zeros() as u64
|
||||
}
|
||||
|
|
@ -85,9 +106,12 @@ pub unsafe fn _tzcnt_u64(x: u64) -> u64 {
|
|||
/// Counts the number of trailing least significant zero bits.
|
||||
///
|
||||
/// When the source operand is 0, it returns its size in bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi1")]
|
||||
#[cfg_attr(test, assert_instr(tzcnt))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 {
|
||||
x.trailing_zeros() as i64
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,10 +17,13 @@ use stdsimd_test::assert_instr;
|
|||
///
|
||||
/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
|
||||
/// the low half and the high half of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u64)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(mulx))]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg(not(target_arch = "x86"))] // calls an intrinsic
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
|
||||
let result: u128 = (a as u128) * (b as u128);
|
||||
*hi = (result >> 64) as u64;
|
||||
|
|
@ -28,30 +31,39 @@ pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
|
|||
}
|
||||
|
||||
/// Zero higher bits of `a` >= `index`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(bzhi))]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 {
|
||||
x86_bmi2_bzhi_64(a, index as u64)
|
||||
}
|
||||
|
||||
/// Scatter contiguous low order bits of `a` to the result at the positions
|
||||
/// specified by the `mask`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(pdep))]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 {
|
||||
x86_bmi2_pdep_64(a, mask)
|
||||
}
|
||||
|
||||
/// Gathers the bits of `x` specified by the `mask` into the contiguous low
|
||||
/// order bit positions of the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "bmi2")]
|
||||
#[cfg_attr(test, assert_instr(pext))]
|
||||
#[cfg(not(target_arch = "x86"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 {
|
||||
x86_bmi2_pext_64(a, mask)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,8 +6,11 @@
|
|||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Return an integer with the reversed byte order of x
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap64)
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(bswap))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _bswap64(x: i64) -> i64 {
|
||||
bswap_i64(x)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,9 +21,12 @@ extern "C" {
|
|||
///
|
||||
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
|
||||
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fxsr")]
|
||||
#[cfg_attr(test, assert_instr(fxsave64))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _fxsave64(mem_addr: *mut u8) {
|
||||
fxsave64(mem_addr)
|
||||
}
|
||||
|
|
@ -42,9 +45,12 @@ pub unsafe fn _fxsave64(mem_addr: *mut u8) {
|
|||
///
|
||||
/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
|
||||
/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "fxsr")]
|
||||
#[cfg_attr(test, assert_instr(fxrstor64))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _fxrstor64(mem_addr: *const u8) {
|
||||
fxrstor64(mem_addr)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,10 +12,13 @@ use stdsimd_test::assert_instr;
|
|||
|
||||
/// Read a hardware generated 64-bit random value and store the result in val.
|
||||
/// Return 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand64_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdrand")]
|
||||
#[cfg_attr(test, assert_instr(rdrand))]
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 {
|
||||
let (v, flag) = x86_rdrand64_step();
|
||||
*val = v;
|
||||
|
|
@ -24,9 +27,12 @@ pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 {
|
|||
|
||||
/// Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store
|
||||
/// in val. Return 1 if a random value was generated, and 0 otherwise.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed64_step)
|
||||
#[inline]
|
||||
#[target_feature(enable = "rdseed")]
|
||||
#[cfg_attr(test, assert_instr(rdseed))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _rdseed64_step(val: &mut u64) -> i32 {
|
||||
let (v, flag) = x86_rdseed64_step();
|
||||
*val = v;
|
||||
|
|
|
|||
|
|
@ -24,9 +24,12 @@ extern "C" {
|
|||
/// [`_mm_setcsr`](fn._mm_setcsr.html)).
|
||||
///
|
||||
/// This corresponds to the `CVTSS2SI` instruction (with 64 bit output).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvtss2si))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 {
|
||||
cvtss2si64(a)
|
||||
}
|
||||
|
|
@ -40,9 +43,12 @@ pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 {
|
|||
/// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
|
||||
///
|
||||
/// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvttss2si))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 {
|
||||
cvttss2si64(a)
|
||||
}
|
||||
|
|
@ -52,9 +58,12 @@ pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 {
|
|||
///
|
||||
/// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit
|
||||
/// input).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_ss)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse")]
|
||||
#[cfg_attr(test, assert_instr(cvtsi2ss))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 {
|
||||
cvtsi642ss(a, b)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,34 +17,46 @@ extern "C" {
|
|||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in a to
|
||||
/// a 64-bit integer.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvtsd2si))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtsd_si64(a: __m128d) -> i64 {
|
||||
cvtsd2si64(a)
|
||||
}
|
||||
|
||||
/// Alias for `_mm_cvtsd_si64`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64x)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvtsd2si))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 {
|
||||
_mm_cvtsd_si64(a)
|
||||
}
|
||||
|
||||
/// Convert the lower double-precision (64-bit) floating-point element in `a`
|
||||
/// to a 64-bit integer with truncation.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvttsd2si))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvttsd_si64(a: __m128d) -> i64 {
|
||||
cvttsd2si64(a)
|
||||
}
|
||||
|
||||
/// Alias for `_mm_cvttsd_si64`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64x)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvttsd2si))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
|
||||
_mm_cvttsd_si64(a)
|
||||
}
|
||||
|
|
@ -52,61 +64,82 @@ pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
|
|||
/// Stores a 64-bit integer value in the specified memory location.
|
||||
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
|
||||
/// used again soon).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(movnti))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
|
||||
intrinsics::nontemporal_store(mem_addr, a);
|
||||
}
|
||||
|
||||
/// Return a vector whose lowest element is `a` and all higher elements are
|
||||
/// `0`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i {
|
||||
_mm_set_epi64x(0, a)
|
||||
}
|
||||
|
||||
/// Return a vector whose lowest element is `a` and all higher elements are
|
||||
/// `0`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_si128)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i {
|
||||
_mm_cvtsi64_si128(a)
|
||||
}
|
||||
|
||||
/// Return the lowest element of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 {
|
||||
simd_extract(a.as_i64x2(), 0)
|
||||
}
|
||||
|
||||
/// Return the lowest element of `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 {
|
||||
_mm_cvtsi128_si64(a)
|
||||
}
|
||||
|
||||
/// Return `a` with its lower element replaced by `b` after converting it to
|
||||
/// an `f64`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvtsi2sd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d {
|
||||
simd_insert(a, 0, b as f64)
|
||||
}
|
||||
|
||||
/// Return `a` with its lower element replaced by `b` after converting it to
|
||||
/// an `f64`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_sd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[cfg_attr(test, assert_instr(cvtsi2sd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d {
|
||||
_mm_cvtsi64_sd(a, b)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,11 +8,14 @@ use mem;
|
|||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Extract an 64-bit integer from `a` selected with `imm8`
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
// TODO: Add test for Windows
|
||||
#[cfg_attr(test, assert_instr(pextrq, imm8 = 1))]
|
||||
#[rustc_args_required_const(1)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 {
|
||||
let imm8 = (imm8 & 1) as u32;
|
||||
simd_extract(a.as_i64x2(), imm8)
|
||||
|
|
@ -20,10 +23,13 @@ pub unsafe fn _mm_extract_epi64(a: __m128i, imm8: i32) -> i64 {
|
|||
|
||||
/// Return a copy of `a` with the 64-bit integer from `i` inserted at a
|
||||
/// location specified by `imm8`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.1")]
|
||||
#[cfg_attr(test, assert_instr(pinsrq, imm8 = 0))]
|
||||
#[rustc_args_required_const(2)]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64, imm8: i32) -> __m128i {
|
||||
mem::transmute(simd_insert(a.as_i64x2(), (imm8 & 1) as u32, i))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,9 +11,12 @@ extern "C" {
|
|||
|
||||
/// Starting with the initial value in `crc`, return the accumulated
|
||||
/// CRC32 value for unsigned 64-bit integer `v`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "sse4.2")]
|
||||
#[cfg_attr(test, assert_instr(crc32))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {
|
||||
crc32_64_64(crc, v)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,9 +29,12 @@ extern "C" {
|
|||
///
|
||||
/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of
|
||||
/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xsave64))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) {
|
||||
xsave64(
|
||||
mem_addr,
|
||||
|
|
@ -46,9 +49,12 @@ pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and
|
||||
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
|
||||
/// boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave")]
|
||||
#[cfg_attr(test, assert_instr(xrstor64))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) {
|
||||
xrstor64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
|
||||
}
|
||||
|
|
@ -60,9 +66,12 @@ pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) {
|
|||
/// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize
|
||||
/// the manner in which data is saved. The performance of this instruction will
|
||||
/// be equal to or better than using the `XSAVE64` instruction.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaveopt")]
|
||||
#[cfg_attr(test, assert_instr(xsaveopt64))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
|
||||
xsaveopt64(
|
||||
mem_addr,
|
||||
|
|
@ -77,9 +86,12 @@ pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// `xsavec` differs from `xsave` in that it uses compaction and that it may
|
||||
/// use init optimization. State is saved based on bits [62:0] in `save_mask`
|
||||
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsavec")]
|
||||
#[cfg_attr(test, assert_instr(xsavec64))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
|
||||
xsavec64(
|
||||
mem_addr,
|
||||
|
|
@ -95,9 +107,12 @@ pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the
|
||||
/// modified optimization. State is saved based on bits [62:0] in `save_mask`
|
||||
/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaves")]
|
||||
#[cfg_attr(test, assert_instr(xsaves64))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) {
|
||||
xsaves64(
|
||||
mem_addr,
|
||||
|
|
@ -115,9 +130,12 @@ pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) {
|
|||
/// State is restored based on bits [62:0] in `rs_mask`, `XCR0`, and
|
||||
/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
|
||||
/// boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "xsave,xsaves")]
|
||||
#[cfg_attr(test, assert_instr(xrstors64))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) {
|
||||
xrstors64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
//! [stdsimd]: https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/
|
||||
|
||||
#![feature(const_fn, integer_atomics, staged_api, stdsimd)]
|
||||
#![feature(cfg_target_feature, doc_cfg)]
|
||||
#![feature(cfg_target_feature, doc_cfg, allow_internal_unstable)]
|
||||
#![cfg_attr(feature = "cargo-clippy", allow(shadow_reuse))]
|
||||
#![cfg_attr(target_os = "linux", feature(linkage))]
|
||||
#![no_std]
|
||||
|
|
|
|||
|
|
@ -16,8 +16,70 @@
|
|||
//! in a global `AtomicUsize` variable. The query is performed by just checking
|
||||
//! whether the feature bit in this global variable is set or cleared.
|
||||
|
||||
/// A macro to test at *runtime* whether a CPU feature is available on
|
||||
/// x86/x86-64 platforms.
|
||||
///
|
||||
/// This macro is provided in the standard library and will detect at runtime
|
||||
/// whether the specified CPU feature is detected. This does *not* resolve at
|
||||
/// compile time unless the specified feature is already enabled for the entire
|
||||
/// crate. Runtime detection currently relies mostly on the `cpuid` instruction.
|
||||
///
|
||||
/// This macro only takes one argument which is a string literal of the feature
|
||||
/// being tested for. The feature names supported are the lowercase versions of
|
||||
/// the ones defined by Intel in [their documentation][docs].
|
||||
///
|
||||
/// ## Supported arguments
|
||||
///
|
||||
/// This macro supports the same names that `#[target_feature]` supports. Unlike
|
||||
/// `#[target_feature]`, however, this macro does not support names separated
|
||||
/// with a comma. Instead testing for multiple features must be done through
|
||||
/// separate macro invocations for now.
|
||||
///
|
||||
/// Supported arguments are:
|
||||
///
|
||||
/// * `"aes"`
|
||||
/// * `"pclmulqdq"`
|
||||
/// * `"rdrand"`
|
||||
/// * `"rdseed"`
|
||||
/// * `"tsc"`
|
||||
/// * `"mmx"`
|
||||
/// * `"sse"`
|
||||
/// * `"sse2"`
|
||||
/// * `"sse3"`
|
||||
/// * `"ssse3"`
|
||||
/// * `"sse4.1"`
|
||||
/// * `"sse4.2"`
|
||||
/// * `"sse4a"`
|
||||
/// * `"sha"`
|
||||
/// * `"avx"`
|
||||
/// * `"avx2"`
|
||||
/// * `"avx512f"`
|
||||
/// * `"avx512cd"`
|
||||
/// * `"avx512er"`
|
||||
/// * `"avx512pf"`
|
||||
/// * `"avx512bw"`
|
||||
/// * `"avx512dq"`
|
||||
/// * `"avx512vl"`
|
||||
/// * `"avx512ifma"`
|
||||
/// * `"avx512vbmi"`
|
||||
/// * `"avx512vpopcntdq"`
|
||||
/// * `"fma"`
|
||||
/// * `"bmi1"`
|
||||
/// * `"bmi2"`
|
||||
/// * `"abm"`
|
||||
/// * `"lzcnt"`
|
||||
/// * `"tbm"`
|
||||
/// * `"popcnt"`
|
||||
/// * `"fxsr"`
|
||||
/// * `"xsave"`
|
||||
/// * `"xsaveopt"`
|
||||
/// * `"xsaves"`
|
||||
/// * `"xsavec"`
|
||||
///
|
||||
/// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
|
||||
#[macro_export]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
#[allow_internal_unstable]
|
||||
macro_rules! is_x86_feature_detected {
|
||||
("aes") => {
|
||||
cfg!(target_feature = "aes") || $crate::arch::detect::check_for(
|
||||
|
|
|
|||
|
|
@ -343,30 +343,38 @@
|
|||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
#[stable(feature = "simd_arch", since = "1.27.0")]
|
||||
pub mod arch {
|
||||
#[cfg(all(not(dox), target_arch = "x86"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use coresimd::arch::x86;
|
||||
|
||||
#[cfg(all(not(dox), target_arch = "x86_64"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub use coresimd::arch::x86_64;
|
||||
|
||||
#[cfg(all(not(dox), target_arch = "arm"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub use coresimd::arch::arm;
|
||||
|
||||
#[cfg(all(not(dox), target_arch = "aarch64"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub use coresimd::arch::aarch64;
|
||||
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub use coresimd::arch::wasm32;
|
||||
|
||||
#[cfg(all(not(dox), target_arch = "mips"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub use coresimd::arch::mips;
|
||||
|
||||
#[cfg(all(not(dox), target_arch = "mips64"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub use coresimd::arch::mips64;
|
||||
|
||||
#[doc(hidden)] // unstable implementation detail
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub mod detect;
|
||||
|
||||
/// Platform-specific intrinsics for the `x86` platform.
|
||||
|
|
@ -378,6 +386,7 @@ pub mod arch {
|
|||
/// [libcore]: ../../../core/arch/x86/index.html
|
||||
#[cfg(dox)]
|
||||
#[doc(cfg(target_arch = "x86"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub mod x86 {}
|
||||
|
||||
/// Platform-specific intrinsics for the `x86_64` platform.
|
||||
|
|
@ -389,6 +398,7 @@ pub mod arch {
|
|||
/// [libcore]: ../../../core/arch/x86_64/index.html
|
||||
#[cfg(dox)]
|
||||
#[doc(cfg(target_arch = "x86_64"))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub mod x86_64 {}
|
||||
|
||||
/// Platform-specific intrinsics for the `arm` platform.
|
||||
|
|
@ -400,6 +410,7 @@ pub mod arch {
|
|||
/// [libcore]: ../../../core/arch/arm/index.html
|
||||
#[cfg(dox)]
|
||||
#[doc(cfg(target_arch = "arm"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub mod arm {}
|
||||
|
||||
/// Platform-specific intrinsics for the `aarch64` platform.
|
||||
|
|
@ -411,6 +422,7 @@ pub mod arch {
|
|||
/// [libcore]: ../../../core/arch/aarch64/index.html
|
||||
#[cfg(dox)]
|
||||
#[doc(cfg(target_arch = "aarch64"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub mod aarch64 {}
|
||||
|
||||
/// Platform-specific intrinsics for the `mips` platform.
|
||||
|
|
@ -422,6 +434,7 @@ pub mod arch {
|
|||
/// [libcore]: ../../../core/arch/mips/index.html
|
||||
#[cfg(dox)]
|
||||
#[doc(cfg(target_arch = "mips"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub mod mips {}
|
||||
|
||||
/// Platform-specific intrinsics for the `mips64` platform.
|
||||
|
|
@ -433,6 +446,7 @@ pub mod arch {
|
|||
/// [libcore]: ../../../core/arch/mips64/index.html
|
||||
#[cfg(dox)]
|
||||
#[doc(cfg(target_arch = "mips64"))]
|
||||
#[unstable(feature = "stdsimd", issue = "0")]
|
||||
pub mod mips64 {}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue