Auto merge of #149118 - folkertdev:stdarch-sync-2025-11-19, r=tgross35
stdarch subtree update
Subtree update of `stdarch` to 50134e10cb.
Created using https://github.com/rust-lang/josh-sync.
The only interesting commit is the final one, which enables the `avx10_target_feature` feature in the standard library, because it is now used in `stdarch`.
r? `@sayantn` (or whoever, this is just a straightforward sync)
This commit is contained in:
commit
2c0f4860cc
29 changed files with 1031 additions and 692 deletions
|
|
@ -195,6 +195,7 @@
|
|||
// tidy-alphabetical-start
|
||||
#![feature(aarch64_unstable_target_feature)]
|
||||
#![feature(arm_target_feature)]
|
||||
#![feature(avx10_target_feature)]
|
||||
#![feature(hexagon_target_feature)]
|
||||
#![feature(loongarch_target_feature)]
|
||||
#![feature(mips_target_feature)]
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ jobs:
|
|||
# https://rust-lang.zulipchat.com/#narrow/channel/208962-t-libs.2Fstdarch/topic/Subtree.20sync.20automation/with/528461782
|
||||
zulip-stream-id: 208962
|
||||
zulip-bot-email: "stdarch-ci-bot@rust-lang.zulipchat.com"
|
||||
pr-base-branch: master
|
||||
pr-base-branch: main
|
||||
branch-name: rustc-pull
|
||||
secrets:
|
||||
zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }}
|
||||
|
|
|
|||
|
|
@ -15,5 +15,4 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
|
||||
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
|
||||
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \
|
||||
OBJDUMP=aarch64-linux-gnu-objdump \
|
||||
STDARCH_TEST_SKIP_FEATURE=tme
|
||||
OBJDUMP=aarch64-linux-gnu-objdump
|
||||
|
|
|
|||
|
|
@ -27,4 +27,3 @@ ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
|
|||
ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc"
|
||||
ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}"
|
||||
ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump"
|
||||
ENV STDARCH_TEST_SKIP_FEATURE=tme
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ run() {
|
|||
--env NORUN \
|
||||
--env RUSTFLAGS \
|
||||
--env CARGO_UNSTABLE_BUILD_STD \
|
||||
--env TEST_SAMPLE_INTRINSICS_PERCENTAGE \
|
||||
--volume "${HOME}/.cargo":/cargo \
|
||||
--volume "$(rustc --print sysroot)":/rust:ro \
|
||||
--volume "$(pwd)":/checkout:ro \
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ case ${TARGET} in
|
|||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
|
||||
TEST_CXX_COMPILER="clang++"
|
||||
TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
|
||||
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
|
||||
;;
|
||||
|
||||
aarch64_be-unknown-linux-gnu*)
|
||||
|
|
@ -58,6 +59,7 @@ case ${TARGET} in
|
|||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
|
||||
TEST_CXX_COMPILER="clang++"
|
||||
TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
|
||||
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
|
||||
;;
|
||||
|
||||
armv7-unknown-linux-gnueabihf*)
|
||||
|
|
@ -65,6 +67,7 @@ case ${TARGET} in
|
|||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
|
||||
TEST_CXX_COMPILER="clang++"
|
||||
TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
|
||||
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
|
||||
;;
|
||||
|
||||
x86_64-unknown-linux-gnu*)
|
||||
|
|
@ -72,7 +75,7 @@ case ${TARGET} in
|
|||
TEST_CXX_COMPILER="clang++"
|
||||
TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}"
|
||||
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt
|
||||
TEST_SAMPLE_INTRINSICS_PERCENTAGE=5
|
||||
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=5}"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
|
|
@ -82,23 +85,25 @@ esac
|
|||
# Arm specific
|
||||
case "${TARGET}" in
|
||||
aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*)
|
||||
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
|
||||
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=info \
|
||||
cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
|
||||
--bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
|
||||
--runner "${TEST_RUNNER}" \
|
||||
--cppcompiler "${TEST_CXX_COMPILER}" \
|
||||
--skip "${TEST_SKIP_INTRINSICS}" \
|
||||
--target "${TARGET}"
|
||||
--target "${TARGET}" \
|
||||
--sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}"
|
||||
;;
|
||||
|
||||
aarch64_be-unknown-linux-gnu*)
|
||||
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
|
||||
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=info \
|
||||
cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
|
||||
--bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
|
||||
--runner "${TEST_RUNNER}" \
|
||||
--cppcompiler "${TEST_CXX_COMPILER}" \
|
||||
--skip "${TEST_SKIP_INTRINSICS}" \
|
||||
--target "${TARGET}" \
|
||||
--sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" \
|
||||
--linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \
|
||||
--cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}"
|
||||
;;
|
||||
|
|
@ -109,7 +114,7 @@ case "${TARGET}" in
|
|||
# Hence the use of `env -u`.
|
||||
env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \
|
||||
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" \
|
||||
RUST_LOG=warn RUST_BACKTRACE=1 \
|
||||
RUST_LOG=info RUST_BACKTRACE=1 \
|
||||
cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
|
||||
--bin intrinsic-test -- intrinsics_data/x86-intel.xml \
|
||||
--runner "${TEST_RUNNER}" \
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
The `core::arch` module implements architecture-dependent intrinsics (e.g. SIMD).
|
||||
|
||||
# Usage
|
||||
# Usage
|
||||
|
||||
`core::arch` is available as part of `libcore` and it is re-exported by
|
||||
`libstd`. Prefer using it via `core::arch` or `std::arch` than via this crate.
|
||||
|
|
@ -17,7 +17,7 @@ are:
|
|||
you need to re-compile it for a non-standard target, please prefer using
|
||||
`xargo` and re-compiling `libcore`/`libstd` as appropriate instead of using
|
||||
this crate.
|
||||
|
||||
|
||||
* using some features that might not be available even behind unstable Rust
|
||||
features. We try to keep these to a minimum. If you need to use some of these
|
||||
features, please open an issue so that we can expose them in nightly Rust and
|
||||
|
|
@ -34,7 +34,7 @@ are:
|
|||
* [How to get started][contrib]
|
||||
* [How to help implement intrinsics][help-implement]
|
||||
|
||||
[contrib]: https://github.com/rust-lang/stdarch/blob/master/CONTRIBUTING.md
|
||||
[contrib]: https://github.com/rust-lang/stdarch/blob/HEAD/CONTRIBUTING.md
|
||||
[help-implement]: https://github.com/rust-lang/stdarch/issues/40
|
||||
[i686]: https://rust-lang.github.io/stdarch/i686/core_arch/
|
||||
[x86_64]: https://rust-lang.github.io/stdarch/x86_64/core_arch/
|
||||
|
|
|
|||
|
|
@ -21,10 +21,6 @@ mod neon;
|
|||
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
|
||||
pub use self::neon::*;
|
||||
|
||||
mod tme;
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub use self::tme::*;
|
||||
|
||||
mod prefetch;
|
||||
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
|
||||
pub use self::prefetch::*;
|
||||
|
|
|
|||
|
|
@ -1,201 +0,0 @@
|
|||
//! ARM's Transactional Memory Extensions (TME).
|
||||
//!
|
||||
//! This CPU feature is available on Aarch64 - A architecture profile.
|
||||
//! This feature is in the non-neon feature set. TME specific vendor documentation can
|
||||
//! be found [TME Intrinsics Introduction][tme_intrinsics_intro].
|
||||
//!
|
||||
//! The reference is [ACLE Q4 2019][acle_q4_2019_ref].
|
||||
//!
|
||||
//! ACLE has a section for TME extensions and state masks for aborts and failure codes.
|
||||
//! [ARM A64 Architecture Register Datasheet][a_profile_future] also describes possible failure code scenarios.
|
||||
//!
|
||||
//! [acle_q4_2019_ref]: https://static.docs.arm.com/101028/0010/ACLE_2019Q4_release-0010.pdf
|
||||
//! [tme_intrinsics_intro]: https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics
|
||||
//! [llvm_aarch64_int]: https://github.com/llvm/llvm-project/commit/a36d31478c182903523e04eb271bbf102bfab2cc#diff-ff24e1c35f4d54f1110ce5d90c709319R626-R646
|
||||
//! [a_profile_future]: https://static.docs.arm.com/ddi0601/a/SysReg_xml_futureA-2019-04.pdf?_ga=2.116560387.441514988.1590524918-1110153136.1588469296
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
unsafe extern "unadjusted" {
|
||||
#[link_name = "llvm.aarch64.tstart"]
|
||||
fn aarch64_tstart() -> u64;
|
||||
#[link_name = "llvm.aarch64.tcommit"]
|
||||
fn aarch64_tcommit();
|
||||
#[link_name = "llvm.aarch64.tcancel"]
|
||||
fn aarch64_tcancel(imm0: u64);
|
||||
#[link_name = "llvm.aarch64.ttest"]
|
||||
fn aarch64_ttest() -> u64;
|
||||
}
|
||||
|
||||
/// Transaction successfully started.
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMSTART_SUCCESS: u64 = 0x00_u64;
|
||||
|
||||
/// Extraction mask for failure reason
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_REASON: u64 = 0x00007FFF_u64;
|
||||
|
||||
/// Transaction retry is possible.
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_RTRY: u64 = 1 << 15;
|
||||
|
||||
/// Transaction executed a TCANCEL instruction
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_CNCL: u64 = 1 << 16;
|
||||
|
||||
/// Transaction aborted because a conflict occurred
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_MEM: u64 = 1 << 17;
|
||||
|
||||
/// Fallback error type for any other reason
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_IMP: u64 = 1 << 18;
|
||||
|
||||
/// Transaction aborted because a non-permissible operation was attempted
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_ERR: u64 = 1 << 19;
|
||||
|
||||
/// Transaction aborted due to read or write set limit was exceeded
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_SIZE: u64 = 1 << 20;
|
||||
|
||||
/// Transaction aborted due to transactional nesting level was exceeded
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_NEST: u64 = 1 << 21;
|
||||
|
||||
/// Transaction aborted due to a debug trap.
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_DBG: u64 = 1 << 22;
|
||||
|
||||
/// Transaction failed from interrupt
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_INT: u64 = 1 << 23;
|
||||
|
||||
/// Indicates a TRIVIAL version of TM is available
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub const _TMFAILURE_TRIVIAL: u64 = 1 << 24;
|
||||
|
||||
// NOTE: Tests for these instructions are disabled on MSVC as dumpbin doesn't
|
||||
// understand these instructions.
|
||||
|
||||
/// Starts a new transaction. When the transaction starts successfully the return value is 0.
|
||||
/// If the transaction fails, all state modifications are discarded and a cause of the failure
|
||||
/// is encoded in the return value.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tstart))]
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub unsafe fn __tstart() -> u64 {
|
||||
aarch64_tstart()
|
||||
}
|
||||
|
||||
/// Commits the current transaction. For a nested transaction, the only effect is that the
|
||||
/// transactional nesting depth is decreased. For an outer transaction, the state modifications
|
||||
/// performed transactionally are committed to the architectural state.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tcommit))]
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub unsafe fn __tcommit() {
|
||||
aarch64_tcommit()
|
||||
}
|
||||
|
||||
/// Cancels the current transaction and discards all state modifications that were performed transactionally.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(
|
||||
all(test, not(target_env = "msvc")),
|
||||
assert_instr(tcancel, IMM16 = 0x0)
|
||||
)]
|
||||
#[rustc_legacy_const_generics(0)]
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub unsafe fn __tcancel<const IMM16: u64>() {
|
||||
static_assert!(IMM16 <= 65535);
|
||||
aarch64_tcancel(IMM16);
|
||||
}
|
||||
|
||||
/// Tests if executing inside a transaction. If no transaction is currently executing,
|
||||
/// the return value is 0. Otherwise, this intrinsic returns the depth of the transaction.
|
||||
///
|
||||
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
|
||||
#[inline]
|
||||
#[target_feature(enable = "tme")]
|
||||
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ttest))]
|
||||
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
|
||||
pub unsafe fn __ttest() -> u64 {
|
||||
aarch64_ttest()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
use crate::core_arch::aarch64::*;
|
||||
|
||||
const CANCEL_CODE: u64 = (0 | (0x123 & _TMFAILURE_REASON) as u64) as u64;
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_tstart() {
|
||||
let mut x = 0;
|
||||
for i in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
x += 1;
|
||||
assert_eq!(x, i + 1);
|
||||
break;
|
||||
}
|
||||
assert_eq!(x, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_tcommit() {
|
||||
let mut x = 0;
|
||||
for i in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
x += 1;
|
||||
assert_eq!(x, i + 1);
|
||||
tme::__tcommit();
|
||||
}
|
||||
assert_eq!(x, i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_tcancel() {
|
||||
let mut x = 0;
|
||||
|
||||
for i in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
x += 1;
|
||||
assert_eq!(x, i + 1);
|
||||
tme::__tcancel::<CANCEL_CODE>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(x, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "tme")]
|
||||
unsafe fn test_ttest() {
|
||||
for _ in 0..10 {
|
||||
let code = tme::__tstart();
|
||||
if code == _TMSTART_SUCCESS {
|
||||
if tme::__ttest() == 2 {
|
||||
tme::__tcancel::<CANCEL_CODE>();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -33,7 +33,8 @@
|
|||
f16,
|
||||
aarch64_unstable_target_feature,
|
||||
bigint_helper_methods,
|
||||
funnel_shifts
|
||||
funnel_shifts,
|
||||
avx10_target_feature
|
||||
)]
|
||||
#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
|
||||
#![deny(clippy::missing_inline_in_public_items)]
|
||||
|
|
|
|||
|
|
@ -163,3 +163,17 @@ macro_rules! simd_extract {
|
|||
($x:expr, $idx:expr $(,)?) => {{ $crate::intrinsics::simd::simd_extract($x, const { $idx }) }};
|
||||
($x:expr, $idx:expr, $ty:ty $(,)?) => {{ $crate::intrinsics::simd::simd_extract::<_, $ty>($x, const { $idx }) }};
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! simd_masked_load {
|
||||
($align:expr, $mask:expr, $ptr:expr, $default:expr) => {
|
||||
$crate::intrinsics::simd::simd_masked_load::<_, _, _, { $align }>($mask, $ptr, $default)
|
||||
};
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! simd_masked_store {
|
||||
($align:expr, $mask:expr, $ptr:expr, $default:expr) => {
|
||||
$crate::intrinsics::simd::simd_masked_store::<_, _, _, { $align }>($mask, $ptr, $default)
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1675,7 +1675,8 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
|
|||
#[cfg_attr(test, assert_instr(vmaskmovpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
|
||||
maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
|
||||
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_pd())
|
||||
}
|
||||
|
||||
/// Stores packed double-precision (64-bit) floating-point elements from `a`
|
||||
|
|
@ -1687,7 +1688,8 @@ pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d
|
|||
#[cfg_attr(test, assert_instr(vmaskmovpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
|
||||
maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
|
||||
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
|
||||
}
|
||||
|
||||
/// Loads packed double-precision (64-bit) floating-point elements from memory
|
||||
|
|
@ -1700,7 +1702,8 @@ pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d)
|
|||
#[cfg_attr(test, assert_instr(vmaskmovpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
|
||||
maskloadpd(mem_addr as *const i8, mask.as_i64x2())
|
||||
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_pd())
|
||||
}
|
||||
|
||||
/// Stores packed double-precision (64-bit) floating-point elements from `a`
|
||||
|
|
@ -1712,7 +1715,8 @@ pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
|
|||
#[cfg_attr(test, assert_instr(vmaskmovpd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
|
||||
maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a);
|
||||
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
|
||||
}
|
||||
|
||||
/// Loads packed single-precision (32-bit) floating-point elements from memory
|
||||
|
|
@ -1725,7 +1729,8 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
|
|||
#[cfg_attr(test, assert_instr(vmaskmovps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
|
||||
maskloadps256(mem_addr as *const i8, mask.as_i32x8())
|
||||
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_ps())
|
||||
}
|
||||
|
||||
/// Stores packed single-precision (32-bit) floating-point elements from `a`
|
||||
|
|
@ -1737,7 +1742,8 @@ pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256
|
|||
#[cfg_attr(test, assert_instr(vmaskmovps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
|
||||
maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
|
||||
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
|
||||
}
|
||||
|
||||
/// Loads packed single-precision (32-bit) floating-point elements from memory
|
||||
|
|
@ -1750,7 +1756,8 @@ pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256)
|
|||
#[cfg_attr(test, assert_instr(vmaskmovps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
|
||||
maskloadps(mem_addr as *const i8, mask.as_i32x4())
|
||||
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_ps())
|
||||
}
|
||||
|
||||
/// Stores packed single-precision (32-bit) floating-point elements from `a`
|
||||
|
|
@ -1762,7 +1769,8 @@ pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
|
|||
#[cfg_attr(test, assert_instr(vmaskmovps))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
|
||||
maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a);
|
||||
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
|
||||
}
|
||||
|
||||
/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
|
||||
|
|
@ -3147,22 +3155,6 @@ unsafe extern "C" {
|
|||
fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
|
||||
#[link_name = "llvm.x86.avx.vpermilvar.pd"]
|
||||
fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
|
||||
#[link_name = "llvm.x86.avx.maskload.pd.256"]
|
||||
fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
|
||||
#[link_name = "llvm.x86.avx.maskstore.pd.256"]
|
||||
fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
|
||||
#[link_name = "llvm.x86.avx.maskload.pd"]
|
||||
fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
|
||||
#[link_name = "llvm.x86.avx.maskstore.pd"]
|
||||
fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
|
||||
#[link_name = "llvm.x86.avx.maskload.ps.256"]
|
||||
fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
|
||||
#[link_name = "llvm.x86.avx.maskstore.ps.256"]
|
||||
fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
|
||||
#[link_name = "llvm.x86.avx.maskload.ps"]
|
||||
fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
|
||||
#[link_name = "llvm.x86.avx.maskstore.ps"]
|
||||
fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
|
||||
#[link_name = "llvm.x86.avx.ldu.dq.256"]
|
||||
fn vlddqu(mem_addr: *const i8) -> i8x32;
|
||||
#[link_name = "llvm.x86.avx.rcp.ps.256"]
|
||||
|
|
@ -3928,28 +3920,43 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx")]
|
||||
unsafe fn test_mm256_permute2f128_ps() {
|
||||
let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
|
||||
let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
|
||||
let r = _mm256_permute2f128_ps::<0x13>(a, b);
|
||||
let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
|
||||
let a = _mm256_setr_ps(11., 12., 13., 14., 15., 16., 17., 18.);
|
||||
let b = _mm256_setr_ps(21., 22., 23., 24., 25., 26., 27., 28.);
|
||||
let r = _mm256_permute2f128_ps::<0b0001_0011>(a, b);
|
||||
let e = _mm256_setr_ps(25., 26., 27., 28., 15., 16., 17., 18.);
|
||||
assert_eq_m256(r, e);
|
||||
|
||||
// Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
|
||||
let r = _mm256_permute2f128_ps::<0b1001_1011>(a, b);
|
||||
let z = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
|
||||
assert_eq_m256(r, z);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
unsafe fn test_mm256_permute2f128_pd() {
|
||||
let a = _mm256_setr_pd(1., 2., 3., 4.);
|
||||
let b = _mm256_setr_pd(5., 6., 7., 8.);
|
||||
let r = _mm256_permute2f128_pd::<0x31>(a, b);
|
||||
let r = _mm256_permute2f128_pd::<0b0011_0001>(a, b);
|
||||
let e = _mm256_setr_pd(3., 4., 7., 8.);
|
||||
assert_eq_m256d(r, e);
|
||||
|
||||
// Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
|
||||
let r = _mm256_permute2f128_pd::<0b1011_1001>(a, b);
|
||||
let e = _mm256_setr_pd(0.0, 0.0, 0.0, 0.0);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
unsafe fn test_mm256_permute2f128_si256() {
|
||||
let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
|
||||
let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
|
||||
let r = _mm256_permute2f128_si256::<0x20>(a, b);
|
||||
let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let a = _mm256_setr_epi32(11, 12, 13, 14, 15, 16, 17, 18);
|
||||
let b = _mm256_setr_epi32(21, 22, 23, 24, 25, 26, 27, 28);
|
||||
let r = _mm256_permute2f128_si256::<0b0010_0000>(a, b);
|
||||
let e = _mm256_setr_epi32(11, 12, 13, 14, 21, 22, 23, 24);
|
||||
assert_eq_m256i(r, e);
|
||||
|
||||
// Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
|
||||
let r = _mm256_permute2f128_si256::<0b1010_1000>(a, b);
|
||||
let e = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1773,7 +1773,7 @@ pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpmaddubsw))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
|
||||
unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) }
|
||||
unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
|
||||
}
|
||||
|
||||
/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
|
||||
|
|
@ -1786,7 +1786,8 @@ pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpmaskmovd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
|
||||
transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
|
||||
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
|
||||
}
|
||||
|
||||
/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
|
||||
|
|
@ -1799,7 +1800,8 @@ pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i
|
|||
#[cfg_attr(test, assert_instr(vpmaskmovd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
|
||||
transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
|
||||
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
|
||||
}
|
||||
|
||||
/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
|
||||
|
|
@ -1812,7 +1814,8 @@ pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m2
|
|||
#[cfg_attr(test, assert_instr(vpmaskmovq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
|
||||
transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
|
||||
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
|
||||
}
|
||||
|
||||
/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
|
||||
|
|
@ -1825,7 +1828,8 @@ pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i
|
|||
#[cfg_attr(test, assert_instr(vpmaskmovq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
|
||||
transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
|
||||
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
|
||||
}
|
||||
|
||||
/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
|
||||
|
|
@ -1838,7 +1842,8 @@ pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m2
|
|||
#[cfg_attr(test, assert_instr(vpmaskmovd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
|
||||
maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
|
||||
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
|
||||
}
|
||||
|
||||
/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
|
||||
|
|
@ -1851,7 +1856,8 @@ pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i)
|
|||
#[cfg_attr(test, assert_instr(vpmaskmovd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
|
||||
maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
|
||||
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
|
||||
}
|
||||
|
||||
/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
|
||||
|
|
@ -1864,7 +1870,8 @@ pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m25
|
|||
#[cfg_attr(test, assert_instr(vpmaskmovq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
|
||||
maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
|
||||
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
|
||||
}
|
||||
|
||||
/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
|
||||
|
|
@ -1877,7 +1884,8 @@ pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i)
|
|||
#[cfg_attr(test, assert_instr(vpmaskmovq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
|
||||
maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
|
||||
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
|
||||
}
|
||||
|
||||
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
|
||||
|
|
@ -2778,7 +2786,12 @@ pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpsllvd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x4();
|
||||
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x4::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 32-bit integers in `a` left by the amount
|
||||
|
|
@ -2791,7 +2804,12 @@ pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpsllvd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x8();
|
||||
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x8::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 64-bit integers in `a` left by the amount
|
||||
|
|
@ -2804,7 +2822,12 @@ pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpsllvq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x2();
|
||||
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x2::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 64-bit integers in `a` left by the amount
|
||||
|
|
@ -2817,7 +2840,12 @@ pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpsllvq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x4();
|
||||
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x4::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 16-bit integers in `a` right by `count` while
|
||||
|
|
@ -2881,7 +2909,12 @@ pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpsravd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x4();
|
||||
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, transmute(count), i32x4::splat(31));
|
||||
simd_shr(a.as_i32x4(), count).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 32-bit integers in `a` right by the amount specified by the
|
||||
|
|
@ -2893,7 +2926,12 @@ pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpsravd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x8();
|
||||
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, transmute(count), i32x8::splat(31));
|
||||
simd_shr(a.as_i32x8(), count).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
|
||||
|
|
@ -3076,7 +3114,12 @@ pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpsrlvd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x4();
|
||||
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x4::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 32-bit integers in `a` right by the amount specified by
|
||||
|
|
@ -3088,7 +3131,12 @@ pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpsrlvd))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x8();
|
||||
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x8::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 64-bit integers in `a` right by the amount specified by
|
||||
|
|
@ -3100,7 +3148,12 @@ pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vpsrlvq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x2();
|
||||
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x2::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts packed 64-bit integers in `a` right by the amount specified by
|
||||
|
|
@ -3112,7 +3165,12 @@ pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
|
|||
#[cfg_attr(test, assert_instr(vpsrlvq))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x4();
|
||||
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x4::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
|
||||
|
|
@ -3644,23 +3702,7 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx2.phsub.sw"]
|
||||
fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
|
||||
fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.maskload.d"]
|
||||
fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.avx2.maskload.d.256"]
|
||||
fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.maskload.q"]
|
||||
fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
|
||||
#[link_name = "llvm.x86.avx2.maskload.q.256"]
|
||||
fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.maskstore.d"]
|
||||
fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
|
||||
#[link_name = "llvm.x86.avx2.maskstore.d.256"]
|
||||
fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
|
||||
#[link_name = "llvm.x86.avx2.maskstore.q"]
|
||||
fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
|
||||
#[link_name = "llvm.x86.avx2.maskstore.q.256"]
|
||||
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
|
||||
fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.mpsadbw"]
|
||||
fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
|
||||
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
|
||||
|
|
@ -3687,36 +3729,16 @@ unsafe extern "C" {
|
|||
fn pslld(a: i32x8, count: i32x4) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psll.q"]
|
||||
fn psllq(a: i64x4, count: i64x2) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.psllv.d"]
|
||||
fn psllvd(a: i32x4, count: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.avx2.psllv.d.256"]
|
||||
fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psllv.q"]
|
||||
fn psllvq(a: i64x2, count: i64x2) -> i64x2;
|
||||
#[link_name = "llvm.x86.avx2.psllv.q.256"]
|
||||
fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.psra.w"]
|
||||
fn psraw(a: i16x16, count: i16x8) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.psra.d"]
|
||||
fn psrad(a: i32x8, count: i32x4) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psrav.d"]
|
||||
fn psravd(a: i32x4, count: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.avx2.psrav.d.256"]
|
||||
fn psravd256(a: i32x8, count: i32x8) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psrl.w"]
|
||||
fn psrlw(a: i16x16, count: i16x8) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx2.psrl.d"]
|
||||
fn psrld(a: i32x8, count: i32x4) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psrl.q"]
|
||||
fn psrlq(a: i64x4, count: i64x2) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.psrlv.d"]
|
||||
fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
|
||||
#[link_name = "llvm.x86.avx2.psrlv.d.256"]
|
||||
fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx2.psrlv.q"]
|
||||
fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
|
||||
#[link_name = "llvm.x86.avx2.psrlv.q.256"]
|
||||
fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx2.pshuf.b"]
|
||||
fn pshufb(a: u8x32, b: u8x32) -> u8x32;
|
||||
#[link_name = "llvm.x86.avx2.permd"]
|
||||
|
|
@ -5727,7 +5749,7 @@ mod tests {
|
|||
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx")]
|
||||
#[simd_test(enable = "avx2")]
|
||||
unsafe fn test_mm256_extract_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm256_setr_epi8(
|
||||
|
|
|
|||
|
|
@ -5609,7 +5609,8 @@ pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
|
|||
#[cfg_attr(test, assert_instr(vmovdqu16))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
|
||||
transmute(loaddqu16_512(mem_addr, src.as_i16x32(), k))
|
||||
let mask = simd_select_bitmask(k, i16x32::splat(!0), i16x32::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x32()).as_m512i()
|
||||
}
|
||||
|
||||
/// Load packed 16-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -5635,7 +5636,8 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __
|
|||
#[cfg_attr(test, assert_instr(vmovdqu8))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
|
||||
transmute(loaddqu8_512(mem_addr, src.as_i8x64(), k))
|
||||
let mask = simd_select_bitmask(k, i8x64::splat(!0), i8x64::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x64()).as_m512i()
|
||||
}
|
||||
|
||||
/// Load packed 8-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -5661,7 +5663,8 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5
|
|||
#[cfg_attr(test, assert_instr(vmovdqu16))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
|
||||
transmute(loaddqu16_256(mem_addr, src.as_i16x16(), k))
|
||||
let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x16()).as_m256i()
|
||||
}
|
||||
|
||||
/// Load packed 16-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -5687,7 +5690,8 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __
|
|||
#[cfg_attr(test, assert_instr(vmovdqu8))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
|
||||
transmute(loaddqu8_256(mem_addr, src.as_i8x32(), k))
|
||||
let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x32()).as_m256i()
|
||||
}
|
||||
|
||||
/// Load packed 8-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -5713,7 +5717,8 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2
|
|||
#[cfg_attr(test, assert_instr(vmovdqu16))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
|
||||
transmute(loaddqu16_128(mem_addr, src.as_i16x8(), k))
|
||||
let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x8()).as_m128i()
|
||||
}
|
||||
|
||||
/// Load packed 16-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -5739,7 +5744,8 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128
|
|||
#[cfg_attr(test, assert_instr(vmovdqu8))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
|
||||
transmute(loaddqu8_128(mem_addr, src.as_i8x16(), k))
|
||||
let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x16()).as_m128i()
|
||||
}
|
||||
|
||||
/// Load packed 8-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -5764,7 +5770,8 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i
|
|||
#[cfg_attr(test, assert_instr(vmovdqu16))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
|
||||
storedqu16_512(mem_addr, a.as_i16x32(), mask)
|
||||
let mask = simd_select_bitmask(mask, i16x32::splat(!0), i16x32::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x32());
|
||||
}
|
||||
|
||||
/// Store packed 8-bit integers from a into memory using writemask k.
|
||||
|
|
@ -5776,7 +5783,8 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _
|
|||
#[cfg_attr(test, assert_instr(vmovdqu8))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
|
||||
storedqu8_512(mem_addr, a.as_i8x64(), mask)
|
||||
let mask = simd_select_bitmask(mask, i8x64::splat(!0), i8x64::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x64());
|
||||
}
|
||||
|
||||
/// Store packed 16-bit integers from a into memory using writemask k.
|
||||
|
|
@ -5788,7 +5796,8 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m
|
|||
#[cfg_attr(test, assert_instr(vmovdqu16))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
|
||||
storedqu16_256(mem_addr, a.as_i16x16(), mask)
|
||||
let mask = simd_select_bitmask(mask, i16x16::splat(!0), i16x16::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x16());
|
||||
}
|
||||
|
||||
/// Store packed 8-bit integers from a into memory using writemask k.
|
||||
|
|
@ -5800,7 +5809,8 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _
|
|||
#[cfg_attr(test, assert_instr(vmovdqu8))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
|
||||
storedqu8_256(mem_addr, a.as_i8x32(), mask)
|
||||
let mask = simd_select_bitmask(mask, i8x32::splat(!0), i8x32::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x32());
|
||||
}
|
||||
|
||||
/// Store packed 16-bit integers from a into memory using writemask k.
|
||||
|
|
@ -5812,7 +5822,8 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m
|
|||
#[cfg_attr(test, assert_instr(vmovdqu16))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
|
||||
storedqu16_128(mem_addr, a.as_i16x8(), mask)
|
||||
let mask = simd_select_bitmask(mask, i16x8::splat(!0), i16x8::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x8());
|
||||
}
|
||||
|
||||
/// Store packed 8-bit integers from a into memory using writemask k.
|
||||
|
|
@ -5824,7 +5835,8 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12
|
|||
#[cfg_attr(test, assert_instr(vmovdqu8))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
|
||||
storedqu8_128(mem_addr, a.as_i8x16(), mask)
|
||||
let mask = simd_select_bitmask(mask, i8x16::splat(!0), i8x16::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x16());
|
||||
}
|
||||
|
||||
/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
|
||||
|
|
@ -5943,7 +5955,7 @@ pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpmaddubsw))]
|
||||
pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) }
|
||||
unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
|
||||
}
|
||||
|
||||
/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -6852,7 +6864,12 @@ pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsllvw))]
|
||||
pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x32();
|
||||
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x32::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -6891,7 +6908,12 @@ pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsllvw))]
|
||||
pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x16();
|
||||
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x16::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -6930,7 +6952,12 @@ pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsllvw))]
|
||||
pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x8();
|
||||
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x8::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7188,7 +7215,12 @@ pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsrlvw))]
|
||||
pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x32();
|
||||
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x32::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7227,7 +7259,12 @@ pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsrlvw))]
|
||||
pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x16();
|
||||
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x16::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7266,7 +7303,12 @@ pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsrlvw))]
|
||||
pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x8();
|
||||
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, count, u16x8::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7511,7 +7553,12 @@ pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravw))]
|
||||
pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x32();
|
||||
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
|
||||
simd_shr(a.as_i16x32(), count).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7550,7 +7597,12 @@ pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravw))]
|
||||
pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x16();
|
||||
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
|
||||
simd_shr(a.as_i16x16(), count).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -7589,7 +7641,12 @@ pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravw))]
|
||||
pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u16x8();
|
||||
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
|
||||
let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
|
||||
simd_shr(a.as_i16x8(), count).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -11631,7 +11688,7 @@ unsafe extern "C" {
|
|||
fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
|
||||
fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
|
||||
fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.packssdw.512"]
|
||||
fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
|
||||
|
|
@ -11645,33 +11702,12 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx512.psll.w.512"]
|
||||
fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psllv.w.512"]
|
||||
fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
|
||||
#[link_name = "llvm.x86.avx512.psllv.w.256"]
|
||||
fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx512.psllv.w.128"]
|
||||
fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psrl.w.512"]
|
||||
fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psrlv.w.512"]
|
||||
fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
|
||||
#[link_name = "llvm.x86.avx512.psrlv.w.256"]
|
||||
fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx512.psrlv.w.128"]
|
||||
fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psra.w.512"]
|
||||
fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psrav.w.512"]
|
||||
fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
|
||||
#[link_name = "llvm.x86.avx512.psrav.w.256"]
|
||||
fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx512.psrav.w.128"]
|
||||
fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
|
||||
fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
|
||||
#[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
|
||||
|
|
@ -11733,33 +11769,6 @@ unsafe extern "C" {
|
|||
fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
|
||||
#[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
|
||||
fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.b.128"]
|
||||
fn loaddqu8_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.w.128"]
|
||||
fn loaddqu16_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.b.256"]
|
||||
fn loaddqu8_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.w.256"]
|
||||
fn loaddqu16_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.b.512"]
|
||||
fn loaddqu8_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.w.512"]
|
||||
fn loaddqu16_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.b.128"]
|
||||
fn storedqu8_128(mem_addr: *mut i8, a: i8x16, mask: u16);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.w.128"]
|
||||
fn storedqu16_128(mem_addr: *mut i16, a: i16x8, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.b.256"]
|
||||
fn storedqu8_256(mem_addr: *mut i8, a: i8x32, mask: u32);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.w.256"]
|
||||
fn storedqu16_256(mem_addr: *mut i16, a: i16x16, mask: u16);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.b.512"]
|
||||
fn storedqu8_512(mem_addr: *mut i8, a: i8x64, mask: u64);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.w.512"]
|
||||
fn storedqu16_512(mem_addr: *mut i16, a: i16x32, mask: u32);
|
||||
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -13326,7 +13335,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_max_epu16() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13343,7 +13352,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_maskz_max_epu16() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13360,7 +13369,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_max_epu16() {
|
||||
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13371,7 +13380,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_max_epu16() {
|
||||
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13382,7 +13391,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_max_epu16() {
|
||||
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13393,7 +13402,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_max_epu16() {
|
||||
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13425,7 +13434,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_max_epu8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13453,7 +13462,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_maskz_max_epu8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13480,7 +13489,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_max_epu8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13497,7 +13506,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_max_epu8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13514,7 +13523,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_max_epu8() {
|
||||
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13525,7 +13534,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_max_epu8() {
|
||||
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13551,7 +13560,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_max_epi16() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13568,7 +13577,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_maskz_max_epi16() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13585,7 +13594,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_max_epi16() {
|
||||
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13596,7 +13605,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_max_epi16() {
|
||||
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13607,7 +13616,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_max_epi16() {
|
||||
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13618,7 +13627,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_max_epi16() {
|
||||
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13650,7 +13659,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_max_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13678,7 +13687,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_maskz_max_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13705,7 +13714,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_max_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13722,7 +13731,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_max_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13739,7 +13748,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_max_epi8() {
|
||||
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13750,7 +13759,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_max_epi8() {
|
||||
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13776,7 +13785,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_min_epu16() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13793,7 +13802,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_maskz_min_epu16() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13810,7 +13819,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_min_epu16() {
|
||||
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13821,7 +13830,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_min_epu16() {
|
||||
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13832,7 +13841,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_min_epu16() {
|
||||
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13843,7 +13852,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_min_epu16() {
|
||||
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13875,7 +13884,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_min_epu8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13903,7 +13912,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_maskz_min_epu8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13930,7 +13939,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_min_epu8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13947,7 +13956,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_min_epu8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -13964,7 +13973,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_min_epu8() {
|
||||
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -13975,7 +13984,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_min_epu8() {
|
||||
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -14001,7 +14010,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_min_epi16() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -14018,7 +14027,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_maskz_min_epi16() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -14035,7 +14044,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_min_epi16() {
|
||||
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -14046,7 +14055,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_min_epi16() {
|
||||
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -14057,7 +14066,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_min_epi16() {
|
||||
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -14068,7 +14077,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_min_epi16() {
|
||||
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -14100,7 +14109,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_min_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -14128,7 +14137,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_maskz_min_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -14155,7 +14164,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_min_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -14172,7 +14181,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_min_epi8() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
|
|
@ -14189,7 +14198,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_min_epi8() {
|
||||
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -14200,7 +14209,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_min_epi8() {
|
||||
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
|
||||
|
|
@ -16317,7 +16326,7 @@ mod tests {
|
|||
assert_eq_m128i(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_loadu_epi16() {
|
||||
let src = _mm512_set1_epi16(42);
|
||||
let a = &[
|
||||
|
|
@ -16335,7 +16344,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_maskz_loadu_epi16() {
|
||||
let a = &[
|
||||
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
|
|
@ -16352,7 +16361,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_storeu_epi16() {
|
||||
let mut r = [42_i16; 32];
|
||||
let a = &[
|
||||
|
|
@ -16370,7 +16379,7 @@ mod tests {
|
|||
assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_loadu_epi8() {
|
||||
let src = _mm512_set1_epi8(42);
|
||||
let a = &[
|
||||
|
|
@ -16390,7 +16399,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_maskz_loadu_epi8() {
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
|
|
@ -16409,7 +16418,7 @@ mod tests {
|
|||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw")]
|
||||
#[simd_test(enable = "avx512bw")]
|
||||
unsafe fn test_mm512_mask_storeu_epi8() {
|
||||
let mut r = [42_i8; 64];
|
||||
let a = &[
|
||||
|
|
@ -16429,7 +16438,7 @@ mod tests {
|
|||
assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_loadu_epi16() {
|
||||
let src = _mm256_set1_epi16(42);
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
|
|
@ -16443,7 +16452,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_loadu_epi16() {
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
|
|
@ -16454,7 +16463,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_storeu_epi16() {
|
||||
let mut r = [42_i16; 16];
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
|
|
@ -16468,7 +16477,7 @@ mod tests {
|
|||
assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_loadu_epi8() {
|
||||
let src = _mm256_set1_epi8(42);
|
||||
let a = &[
|
||||
|
|
@ -16486,7 +16495,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_loadu_epi8() {
|
||||
let a = &[
|
||||
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
|
||||
|
|
@ -16503,7 +16512,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm256_mask_storeu_epi8() {
|
||||
let mut r = [42_i8; 32];
|
||||
let a = &[
|
||||
|
|
@ -16521,7 +16530,7 @@ mod tests {
|
|||
assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_loadu_epi16() {
|
||||
let src = _mm_set1_epi16(42);
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
|
||||
|
|
@ -16533,7 +16542,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_loadu_epi16() {
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
|
||||
let p = a.as_ptr();
|
||||
|
|
@ -16544,7 +16553,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_storeu_epi16() {
|
||||
let mut r = [42_i16; 8];
|
||||
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
|
||||
|
|
@ -16556,7 +16565,7 @@ mod tests {
|
|||
assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_loadu_epi8() {
|
||||
let src = _mm_set1_epi8(42);
|
||||
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
|
|
@ -16570,7 +16579,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_maskz_loadu_epi8() {
|
||||
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
|
|
@ -16581,7 +16590,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
|
||||
#[simd_test(enable = "avx512bw,avx512vl")]
|
||||
unsafe fn test_mm_mask_storeu_epi8() {
|
||||
let mut r = [42_i8; 16];
|
||||
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
|
|
|
|||
|
|
@ -20940,7 +20940,12 @@ pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravd))]
|
||||
pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x16();
|
||||
let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, transmute(count), i32x16::splat(31));
|
||||
simd_shr(a.as_i32x16(), count).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21035,7 +21040,12 @@ pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravq))]
|
||||
pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x8();
|
||||
let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, transmute(count), i64x8::splat(63));
|
||||
simd_shr(a.as_i64x8(), count).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21074,7 +21084,12 @@ pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m51
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravq))]
|
||||
pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
|
||||
unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x4();
|
||||
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, transmute(count), i64x4::splat(63));
|
||||
simd_shr(a.as_i64x4(), count).as_m256i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21113,7 +21128,12 @@ pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m25
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsravq))]
|
||||
pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
|
||||
unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x2();
|
||||
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, transmute(count), i64x2::splat(63));
|
||||
simd_shr(a.as_i64x2(), count).as_m128i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21692,7 +21712,12 @@ pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsllvd))]
|
||||
pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x16();
|
||||
let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x16::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21787,7 +21812,12 @@ pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsrlvd))]
|
||||
pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) }
|
||||
unsafe {
|
||||
let count = count.as_u32x16();
|
||||
let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
|
||||
let count = simd_select(no_overflow, count, u32x16::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21882,7 +21912,12 @@ pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsllvq))]
|
||||
pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x8();
|
||||
let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x8::ZERO);
|
||||
simd_select(no_overflow, simd_shl(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -21977,7 +22012,12 @@ pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
|
|||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
#[cfg_attr(test, assert_instr(vpsrlvq))]
|
||||
pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
|
||||
unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) }
|
||||
unsafe {
|
||||
let count = count.as_u64x8();
|
||||
let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
|
||||
let count = simd_select(no_overflow, count, u64x8::ZERO);
|
||||
simd_select(no_overflow, simd_shr(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
|
||||
}
|
||||
}
|
||||
|
||||
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
|
|
@ -34715,7 +34755,8 @@ pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
|
|||
#[cfg_attr(test, assert_instr(vmovdqu32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
|
||||
transmute(loaddqu32_512(mem_addr, src.as_i32x16(), k))
|
||||
let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x16()).as_m512i()
|
||||
}
|
||||
|
||||
/// Load packed 32-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -34741,7 +34782,8 @@ pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __
|
|||
#[cfg_attr(test, assert_instr(vmovdqu64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
|
||||
transmute(loaddqu64_512(mem_addr, src.as_i64x8(), k))
|
||||
let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x8()).as_m512i()
|
||||
}
|
||||
|
||||
/// Load packed 64-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -34767,7 +34809,8 @@ pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m
|
|||
#[cfg_attr(test, assert_instr(vmovups))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
|
||||
transmute(loadups_512(mem_addr, src.as_f32x16(), k))
|
||||
let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x16()).as_m512()
|
||||
}
|
||||
|
||||
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -34793,7 +34836,8 @@ pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m51
|
|||
#[cfg_attr(test, assert_instr(vmovupd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
|
||||
transmute(loadupd_512(mem_addr, src.as_f64x8(), k))
|
||||
let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x8()).as_m512d()
|
||||
}
|
||||
|
||||
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -34819,7 +34863,8 @@ pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512
|
|||
#[cfg_attr(test, assert_instr(vmovdqu32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
|
||||
transmute(loaddqu32_256(mem_addr, src.as_i32x8(), k))
|
||||
let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x8()).as_m256i()
|
||||
}
|
||||
|
||||
/// Load packed 32-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -34845,7 +34890,8 @@ pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m
|
|||
#[cfg_attr(test, assert_instr(vmovdqu64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
|
||||
transmute(loaddqu64_256(mem_addr, src.as_i64x4(), k))
|
||||
let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x4()).as_m256i()
|
||||
}
|
||||
|
||||
/// Load packed 64-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -34871,7 +34917,8 @@ pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m
|
|||
#[cfg_attr(test, assert_instr(vmovups))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
|
||||
transmute(loadups_256(mem_addr, src.as_f32x8(), k))
|
||||
let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x8()).as_m256()
|
||||
}
|
||||
|
||||
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -34897,7 +34944,8 @@ pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256
|
|||
#[cfg_attr(test, assert_instr(vmovupd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
|
||||
transmute(loadupd_256(mem_addr, src.as_f64x4(), k))
|
||||
let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x4()).as_m256d()
|
||||
}
|
||||
|
||||
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -34923,7 +34971,8 @@ pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256
|
|||
#[cfg_attr(test, assert_instr(vmovdqu32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
|
||||
transmute(loaddqu32_128(mem_addr, src.as_i32x4(), k))
|
||||
let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x4()).as_m128i()
|
||||
}
|
||||
|
||||
/// Load packed 32-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -34949,7 +34998,8 @@ pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128
|
|||
#[cfg_attr(test, assert_instr(vmovdqu64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
|
||||
transmute(loaddqu64_128(mem_addr, src.as_i64x2(), k))
|
||||
let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x2()).as_m128i()
|
||||
}
|
||||
|
||||
/// Load packed 64-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -34975,7 +35025,8 @@ pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128
|
|||
#[cfg_attr(test, assert_instr(vmovups))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
|
||||
transmute(loadups_128(mem_addr, src.as_f32x4(), k))
|
||||
let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x4()).as_m128()
|
||||
}
|
||||
|
||||
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -35001,7 +35052,8 @@ pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
|
|||
#[cfg_attr(test, assert_instr(vmovupd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
|
||||
transmute(loadupd_128(mem_addr, src.as_f64x2(), k))
|
||||
let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
|
||||
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x2()).as_m128d()
|
||||
}
|
||||
|
||||
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -35027,7 +35079,8 @@ pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
|
|||
#[cfg_attr(test, assert_instr(vmovdqa32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
|
||||
transmute(loaddqa32_512(mem_addr, src.as_i32x16(), k))
|
||||
let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x16()).as_m512i()
|
||||
}
|
||||
|
||||
/// Load packed 32-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -35053,7 +35106,8 @@ pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m
|
|||
#[cfg_attr(test, assert_instr(vmovdqa64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
|
||||
transmute(loaddqa64_512(mem_addr, src.as_i64x8(), k))
|
||||
let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x8()).as_m512i()
|
||||
}
|
||||
|
||||
/// Load packed 64-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -35079,7 +35133,8 @@ pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m5
|
|||
#[cfg_attr(test, assert_instr(vmovaps))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
|
||||
transmute(loadaps_512(mem_addr, src.as_f32x16(), k))
|
||||
let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x16()).as_m512()
|
||||
}
|
||||
|
||||
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -35105,7 +35160,8 @@ pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512
|
|||
#[cfg_attr(test, assert_instr(vmovapd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
|
||||
transmute(loadapd_512(mem_addr, src.as_f64x8(), k))
|
||||
let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x8()).as_m512d()
|
||||
}
|
||||
|
||||
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -35131,7 +35187,8 @@ pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d
|
|||
#[cfg_attr(test, assert_instr(vmovdqa32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
|
||||
transmute(loaddqa32_256(mem_addr, src.as_i32x8(), k))
|
||||
let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x8()).as_m256i()
|
||||
}
|
||||
|
||||
/// Load packed 32-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -35157,7 +35214,8 @@ pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m2
|
|||
#[cfg_attr(test, assert_instr(vmovdqa64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
|
||||
transmute(loaddqa64_256(mem_addr, src.as_i64x4(), k))
|
||||
let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x4()).as_m256i()
|
||||
}
|
||||
|
||||
/// Load packed 64-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -35183,7 +35241,8 @@ pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m2
|
|||
#[cfg_attr(test, assert_instr(vmovaps))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
|
||||
transmute(loadaps_256(mem_addr, src.as_f32x8(), k))
|
||||
let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x8()).as_m256()
|
||||
}
|
||||
|
||||
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -35209,7 +35268,8 @@ pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256
|
|||
#[cfg_attr(test, assert_instr(vmovapd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
|
||||
transmute(loadapd_256(mem_addr, src.as_f64x4(), k))
|
||||
let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x4()).as_m256d()
|
||||
}
|
||||
|
||||
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -35235,7 +35295,8 @@ pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d
|
|||
#[cfg_attr(test, assert_instr(vmovdqa32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
|
||||
transmute(loaddqa32_128(mem_addr, src.as_i32x4(), k))
|
||||
let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x4()).as_m128i()
|
||||
}
|
||||
|
||||
/// Load packed 32-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -35261,7 +35322,8 @@ pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i
|
|||
#[cfg_attr(test, assert_instr(vmovdqa64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
|
||||
transmute(loaddqa64_128(mem_addr, src.as_i64x2(), k))
|
||||
let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x2()).as_m128i()
|
||||
}
|
||||
|
||||
/// Load packed 64-bit integers from memory into dst using zeromask k
|
||||
|
|
@ -35287,7 +35349,8 @@ pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i
|
|||
#[cfg_attr(test, assert_instr(vmovaps))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
|
||||
transmute(loadaps_128(mem_addr, src.as_f32x4(), k))
|
||||
let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x4()).as_m128()
|
||||
}
|
||||
|
||||
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -35313,7 +35376,8 @@ pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
|
|||
#[cfg_attr(test, assert_instr(vmovapd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
|
||||
transmute(loadapd_128(mem_addr, src.as_f64x2(), k))
|
||||
let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
|
||||
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x2()).as_m128d()
|
||||
}
|
||||
|
||||
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
|
||||
|
|
@ -35426,7 +35490,8 @@ pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
|
|||
#[cfg_attr(test, assert_instr(vmovdqu32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
|
||||
storedqu32_512(mem_addr, a.as_i32x16(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x16());
|
||||
}
|
||||
|
||||
/// Store packed 64-bit integers from a into memory using writemask k.
|
||||
|
|
@ -35438,7 +35503,8 @@ pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: _
|
|||
#[cfg_attr(test, assert_instr(vmovdqu64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
|
||||
storedqu64_512(mem_addr, a.as_i64x8(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x8());
|
||||
}
|
||||
|
||||
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35450,7 +35516,8 @@ pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __
|
|||
#[cfg_attr(test, assert_instr(vmovups))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
|
||||
storeups_512(mem_addr, a.as_f32x16(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x16());
|
||||
}
|
||||
|
||||
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35462,7 +35529,8 @@ pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m5
|
|||
#[cfg_attr(test, assert_instr(vmovupd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
|
||||
storeupd_512(mem_addr, a.as_f64x8(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x8());
|
||||
}
|
||||
|
||||
/// Store packed 32-bit integers from a into memory using writemask k.
|
||||
|
|
@ -35474,7 +35542,8 @@ pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m51
|
|||
#[cfg_attr(test, assert_instr(vmovdqu32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
|
||||
storedqu32_256(mem_addr, a.as_i32x8(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8());
|
||||
}
|
||||
|
||||
/// Store packed 64-bit integers from a into memory using writemask k.
|
||||
|
|
@ -35486,7 +35555,8 @@ pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __
|
|||
#[cfg_attr(test, assert_instr(vmovdqu64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
|
||||
storedqu64_256(mem_addr, a.as_i64x4(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4());
|
||||
}
|
||||
|
||||
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35498,7 +35568,8 @@ pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __
|
|||
#[cfg_attr(test, assert_instr(vmovups))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
|
||||
storeups_256(mem_addr, a.as_f32x8(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x8());
|
||||
}
|
||||
|
||||
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35510,7 +35581,8 @@ pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m25
|
|||
#[cfg_attr(test, assert_instr(vmovupd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
|
||||
storeupd_256(mem_addr, a.as_f64x4(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x4());
|
||||
}
|
||||
|
||||
/// Store packed 32-bit integers from a into memory using writemask k.
|
||||
|
|
@ -35522,7 +35594,8 @@ pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m25
|
|||
#[cfg_attr(test, assert_instr(vmovdqu32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
|
||||
storedqu32_128(mem_addr, a.as_i32x4(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4());
|
||||
}
|
||||
|
||||
/// Store packed 64-bit integers from a into memory using writemask k.
|
||||
|
|
@ -35534,7 +35607,8 @@ pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m12
|
|||
#[cfg_attr(test, assert_instr(vmovdqu64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
|
||||
storedqu64_128(mem_addr, a.as_i64x2(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2());
|
||||
}
|
||||
|
||||
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35546,7 +35620,8 @@ pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m12
|
|||
#[cfg_attr(test, assert_instr(vmovups))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
|
||||
storeups_128(mem_addr, a.as_f32x4(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x4());
|
||||
}
|
||||
|
||||
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35558,7 +35633,8 @@ pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128)
|
|||
#[cfg_attr(test, assert_instr(vmovupd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
|
||||
storeupd_128(mem_addr, a.as_f64x2(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
|
||||
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x2());
|
||||
}
|
||||
|
||||
/// Store packed 32-bit integers from a into memory using writemask k.
|
||||
|
|
@ -35570,7 +35646,8 @@ pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d)
|
|||
#[cfg_attr(test, assert_instr(vmovdqa32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
|
||||
storedqa32_512(mem_addr, a.as_i32x16(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x16());
|
||||
}
|
||||
|
||||
/// Store packed 64-bit integers from a into memory using writemask k.
|
||||
|
|
@ -35582,7 +35659,8 @@ pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __
|
|||
#[cfg_attr(test, assert_instr(vmovdqa64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
|
||||
storedqa64_512(mem_addr, a.as_i64x8(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x8());
|
||||
}
|
||||
|
||||
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35594,7 +35672,8 @@ pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m
|
|||
#[cfg_attr(test, assert_instr(vmovaps))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
|
||||
storeaps_512(mem_addr, a.as_f32x16(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x16());
|
||||
}
|
||||
|
||||
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35606,7 +35685,8 @@ pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m51
|
|||
#[cfg_attr(test, assert_instr(vmovapd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
|
||||
storeapd_512(mem_addr, a.as_f64x8(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x8());
|
||||
}
|
||||
|
||||
/// Store packed 32-bit integers from a into memory using writemask k.
|
||||
|
|
@ -35618,7 +35698,8 @@ pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512
|
|||
#[cfg_attr(test, assert_instr(vmovdqa32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
|
||||
storedqa32_256(mem_addr, a.as_i32x8(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x8());
|
||||
}
|
||||
|
||||
/// Store packed 64-bit integers from a into memory using writemask k.
|
||||
|
|
@ -35630,7 +35711,8 @@ pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m
|
|||
#[cfg_attr(test, assert_instr(vmovdqa64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
|
||||
storedqa64_256(mem_addr, a.as_i64x4(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x4());
|
||||
}
|
||||
|
||||
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35642,7 +35724,8 @@ pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m
|
|||
#[cfg_attr(test, assert_instr(vmovaps))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
|
||||
storeaps_256(mem_addr, a.as_f32x8(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x8());
|
||||
}
|
||||
|
||||
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35654,7 +35737,8 @@ pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256
|
|||
#[cfg_attr(test, assert_instr(vmovapd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
|
||||
storeapd_256(mem_addr, a.as_f64x4(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x4());
|
||||
}
|
||||
|
||||
/// Store packed 32-bit integers from a into memory using writemask k.
|
||||
|
|
@ -35666,7 +35750,8 @@ pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256
|
|||
#[cfg_attr(test, assert_instr(vmovdqa32))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
|
||||
storedqa32_128(mem_addr, a.as_i32x4(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x4());
|
||||
}
|
||||
|
||||
/// Store packed 64-bit integers from a into memory using writemask k.
|
||||
|
|
@ -35678,7 +35763,8 @@ pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128
|
|||
#[cfg_attr(test, assert_instr(vmovdqa64))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
|
||||
storedqa64_128(mem_addr, a.as_i64x2(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x2());
|
||||
}
|
||||
|
||||
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35690,7 +35776,8 @@ pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128
|
|||
#[cfg_attr(test, assert_instr(vmovaps))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
|
||||
storeaps_128(mem_addr, a.as_f32x4(), mask)
|
||||
let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x4());
|
||||
}
|
||||
|
||||
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
|
||||
|
|
@ -35702,7 +35789,8 @@ pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
|
|||
#[cfg_attr(test, assert_instr(vmovapd))]
|
||||
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
|
||||
pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
|
||||
storeapd_128(mem_addr, a.as_f64x2(), mask)
|
||||
let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
|
||||
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x2());
|
||||
}
|
||||
|
||||
/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
|
||||
|
|
@ -42833,15 +42921,6 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
|
||||
fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psllv.d.512"]
|
||||
fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
|
||||
#[link_name = "llvm.x86.avx512.psrlv.d.512"]
|
||||
fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
|
||||
#[link_name = "llvm.x86.avx512.psllv.q.512"]
|
||||
fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
|
||||
#[link_name = "llvm.x86.avx512.psrlv.q.512"]
|
||||
fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psll.d.512"]
|
||||
fn vpslld(a: i32x16, count: i32x4) -> i32x16;
|
||||
#[link_name = "llvm.x86.avx512.psrl.d.512"]
|
||||
|
|
@ -42861,16 +42940,6 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx512.psra.q.128"]
|
||||
fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psrav.d.512"]
|
||||
fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.psrav.q.512"]
|
||||
fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
|
||||
#[link_name = "llvm.x86.avx512.psrav.q.256"]
|
||||
fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx512.psrav.q.128"]
|
||||
fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
|
||||
fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
|
||||
#[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
|
||||
|
|
@ -43109,106 +43178,6 @@ unsafe extern "C" {
|
|||
#[link_name = "llvm.x86.avx512.vcomi.sd"]
|
||||
fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
|
||||
fn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
|
||||
fn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
|
||||
fn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
|
||||
fn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
|
||||
fn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
|
||||
fn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
|
||||
fn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
|
||||
fn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
|
||||
fn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
|
||||
fn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
|
||||
fn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
|
||||
#[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
|
||||
fn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.load.d.128"]
|
||||
fn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.load.q.128"]
|
||||
fn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
|
||||
#[link_name = "llvm.x86.avx512.mask.load.ps.128"]
|
||||
fn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.load.pd.128"]
|
||||
fn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
|
||||
#[link_name = "llvm.x86.avx512.mask.load.d.256"]
|
||||
fn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.load.q.256"]
|
||||
fn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.load.ps.256"]
|
||||
fn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.load.pd.256"]
|
||||
fn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.load.d.512"]
|
||||
fn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
|
||||
#[link_name = "llvm.x86.avx512.mask.load.q.512"]
|
||||
fn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
|
||||
#[link_name = "llvm.x86.avx512.mask.load.ps.512"]
|
||||
fn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
|
||||
#[link_name = "llvm.x86.avx512.mask.load.pd.512"]
|
||||
fn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
|
||||
fn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
|
||||
fn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
|
||||
fn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
|
||||
fn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
|
||||
fn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
|
||||
fn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
|
||||
fn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
|
||||
fn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
|
||||
fn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
|
||||
fn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
|
||||
fn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
|
||||
#[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
|
||||
fn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.store.d.128"]
|
||||
fn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.store.q.128"]
|
||||
fn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.store.ps.128"]
|
||||
fn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.store.pd.128"]
|
||||
fn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.store.d.256"]
|
||||
fn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.store.q.256"]
|
||||
fn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.store.ps.256"]
|
||||
fn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.store.pd.256"]
|
||||
fn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.store.d.512"]
|
||||
fn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
|
||||
#[link_name = "llvm.x86.avx512.mask.store.q.512"]
|
||||
fn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
|
||||
#[link_name = "llvm.x86.avx512.mask.store.ps.512"]
|
||||
fn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
|
||||
#[link_name = "llvm.x86.avx512.mask.store.pd.512"]
|
||||
fn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
|
||||
|
||||
#[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
|
||||
fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
|
||||
#[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
|
||||
|
|
@ -46240,11 +46209,25 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_ternarylogic_epi32() {
|
||||
let a = _mm512_set1_epi32(1 << 2);
|
||||
let b = _mm512_set1_epi32(1 << 1);
|
||||
let c = _mm512_set1_epi32(1 << 0);
|
||||
let r = _mm512_ternarylogic_epi32::<8>(a, b, c);
|
||||
let e = _mm512_set1_epi32(0);
|
||||
use core::intrinsics::simd::simd_xor;
|
||||
|
||||
let a = _mm512_set4_epi32(0b100, 0b110, 0b001, 0b101);
|
||||
let b = _mm512_set4_epi32(0b010, 0b011, 0b001, 0b110);
|
||||
let c = _mm512_set4_epi32(0b001, 0b000, 0b001, 0b111);
|
||||
|
||||
// Identity of A.
|
||||
let r = _mm512_ternarylogic_epi32::<0b1111_0000>(a, b, c);
|
||||
assert_eq_m512i(r, a);
|
||||
|
||||
// Bitwise xor.
|
||||
let r = _mm512_ternarylogic_epi32::<0b10010110>(a, b, c);
|
||||
let e = _mm512_set4_epi32(0b111, 0b101, 0b001, 0b100);
|
||||
assert_eq_m512i(r, e);
|
||||
assert_eq_m512i(r, simd_xor(simd_xor(a, b), c));
|
||||
|
||||
// Majority (2 or more bits set).
|
||||
let r = _mm512_ternarylogic_epi32::<0b1110_1000>(a, b, c);
|
||||
let e = _mm512_set4_epi32(0b000, 0b010, 0b001, 0b111);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -46274,11 +46257,27 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_ternarylogic_epi32() {
|
||||
let a = _mm256_set1_epi32(1 << 2);
|
||||
let b = _mm256_set1_epi32(1 << 1);
|
||||
let c = _mm256_set1_epi32(1 << 0);
|
||||
let r = _mm256_ternarylogic_epi32::<8>(a, b, c);
|
||||
let e = _mm256_set1_epi32(0);
|
||||
use core::intrinsics::simd::simd_xor;
|
||||
|
||||
let _mm256_set4_epi32 = |a, b, c, d| _mm256_setr_epi32(a, b, c, d, a, b, c, d);
|
||||
|
||||
let a = _mm256_set4_epi32(0b100, 0b110, 0b001, 0b101);
|
||||
let b = _mm256_set4_epi32(0b010, 0b011, 0b001, 0b110);
|
||||
let c = _mm256_set4_epi32(0b001, 0b000, 0b001, 0b111);
|
||||
|
||||
// Identity of A.
|
||||
let r = _mm256_ternarylogic_epi32::<0b1111_0000>(a, b, c);
|
||||
assert_eq_m256i(r, a);
|
||||
|
||||
// Bitwise xor.
|
||||
let r = _mm256_ternarylogic_epi32::<0b10010110>(a, b, c);
|
||||
let e = _mm256_set4_epi32(0b111, 0b101, 0b001, 0b100);
|
||||
assert_eq_m256i(r, e);
|
||||
assert_eq_m256i(r, simd_xor(simd_xor(a, b), c));
|
||||
|
||||
// Majority (2 or more bits set).
|
||||
let r = _mm256_ternarylogic_epi32::<0b1110_1000>(a, b, c);
|
||||
let e = _mm256_set4_epi32(0b000, 0b010, 0b001, 0b111);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -46308,11 +46307,25 @@ mod tests {
|
|||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_ternarylogic_epi32() {
|
||||
let a = _mm_set1_epi32(1 << 2);
|
||||
let b = _mm_set1_epi32(1 << 1);
|
||||
let c = _mm_set1_epi32(1 << 0);
|
||||
let r = _mm_ternarylogic_epi32::<8>(a, b, c);
|
||||
let e = _mm_set1_epi32(0);
|
||||
use core::intrinsics::simd::simd_xor;
|
||||
|
||||
let a = _mm_setr_epi32(0b100, 0b110, 0b001, 0b101);
|
||||
let b = _mm_setr_epi32(0b010, 0b011, 0b001, 0b110);
|
||||
let c = _mm_setr_epi32(0b001, 0b000, 0b001, 0b111);
|
||||
|
||||
// Identity of A.
|
||||
let r = _mm_ternarylogic_epi32::<0b1111_0000>(a, b, c);
|
||||
assert_eq_m128i(r, a);
|
||||
|
||||
// Bitwise xor.
|
||||
let r = _mm_ternarylogic_epi32::<0b10010110>(a, b, c);
|
||||
let e = _mm_setr_epi32(0b111, 0b101, 0b001, 0b100);
|
||||
assert_eq_m128i(r, e);
|
||||
assert_eq_m128i(r, simd_xor(simd_xor(a, b), c));
|
||||
|
||||
// Majority (2 or more bits set).
|
||||
let r = _mm_ternarylogic_epi32::<0b1110_1000>(a, b, c);
|
||||
let e = _mm_setr_epi32(0b000, 0b010, 0b001, 0b111);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -56063,7 +56076,7 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512dq")]
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_kortest_mask16_u8() {
|
||||
let a: __mmask16 = 0b0110100101101001;
|
||||
let b: __mmask16 = 0b1011011010110110;
|
||||
|
|
@ -56073,7 +56086,7 @@ mod tests {
|
|||
assert_eq!(all_ones, 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512dq")]
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_kortestc_mask16_u8() {
|
||||
let a: __mmask16 = 0b0110100101101001;
|
||||
let b: __mmask16 = 0b1011011010110110;
|
||||
|
|
@ -56081,7 +56094,7 @@ mod tests {
|
|||
assert_eq!(r, 1);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512dq")]
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_kortestz_mask16_u8() {
|
||||
let a: __mmask16 = 0b0110100101101001;
|
||||
let b: __mmask16 = 0b1011011010110110;
|
||||
|
|
@ -56089,7 +56102,7 @@ mod tests {
|
|||
assert_eq!(r, 0);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512dq")]
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_kshiftli_mask16() {
|
||||
let a: __mmask16 = 0b1001011011000011;
|
||||
let r = _kshiftli_mask16::<3>(a);
|
||||
|
|
@ -56109,7 +56122,7 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512dq")]
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_kshiftri_mask16() {
|
||||
let a: __mmask16 = 0b1010100100111100;
|
||||
let r = _kshiftri_mask16::<3>(a);
|
||||
|
|
@ -57370,7 +57383,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_set1_epi32() {
|
||||
let a: i32 = 11;
|
||||
let r = _mm256_maskz_set1_epi32(0, a);
|
||||
|
|
@ -57391,7 +57404,7 @@ mod tests {
|
|||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_maskz_set1_epi32() {
|
||||
let a: i32 = 11;
|
||||
let r = _mm_maskz_set1_epi32(0, a);
|
||||
|
|
|
|||
|
|
@ -20766,7 +20766,7 @@ mod tests {
|
|||
assert_eq_m128h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm_mask_fmsub_round_sh() {
|
||||
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
|
||||
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
|
||||
|
|
@ -20783,7 +20783,7 @@ mod tests {
|
|||
assert_eq_m128h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm_mask3_fmsub_round_sh() {
|
||||
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
|
||||
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
|
||||
|
|
@ -20800,7 +20800,7 @@ mod tests {
|
|||
assert_eq_m128h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm_maskz_fmsub_round_sh() {
|
||||
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
|
||||
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
|
||||
|
|
@ -24529,7 +24529,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_mask_cvtepi32_ph() {
|
||||
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let src = _mm256_set_ph(
|
||||
|
|
@ -24542,7 +24542,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_maskz_cvtepi32_ph() {
|
||||
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r = _mm512_maskz_cvtepi32_ph(0b0101010101010101, a);
|
||||
|
|
@ -24552,7 +24552,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_cvt_roundepi32_ph() {
|
||||
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
|
||||
|
|
@ -24562,7 +24562,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_mask_cvt_roundepi32_ph() {
|
||||
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let src = _mm256_set_ph(
|
||||
|
|
@ -24579,7 +24579,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_maskz_cvt_roundepi32_ph() {
|
||||
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
|
||||
|
|
@ -24658,7 +24658,7 @@ mod tests {
|
|||
assert_eq_m128h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_cvtepu32_ph() {
|
||||
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r = _mm512_cvtepu32_ph(a);
|
||||
|
|
@ -24668,7 +24668,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_mask_cvtepu32_ph() {
|
||||
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let src = _mm256_set_ph(
|
||||
|
|
@ -24681,7 +24681,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_maskz_cvtepu32_ph() {
|
||||
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r = _mm512_maskz_cvtepu32_ph(0b0101010101010101, a);
|
||||
|
|
@ -24691,7 +24691,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_cvt_roundepu32_ph() {
|
||||
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
|
||||
|
|
@ -24701,7 +24701,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_mask_cvt_roundepu32_ph() {
|
||||
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let src = _mm256_set_ph(
|
||||
|
|
@ -24719,7 +24719,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_maskz_cvt_roundepu32_ph() {
|
||||
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
|
||||
|
|
@ -25006,7 +25006,7 @@ mod tests {
|
|||
assert_eq_m128h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_cvtxps_ph() {
|
||||
let a = _mm512_set_ps(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
|
|
@ -25018,7 +25018,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_mask_cvtxps_ph() {
|
||||
let a = _mm512_set_ps(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
|
|
@ -25033,7 +25033,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_maskz_cvtxps_ph() {
|
||||
let a = _mm512_set_ps(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
|
|
@ -25045,7 +25045,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_cvtx_roundps_ph() {
|
||||
let a = _mm512_set_ps(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
|
|
@ -25057,7 +25057,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_mask_cvtx_roundps_ph() {
|
||||
let a = _mm512_set_ps(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
|
|
@ -25077,7 +25077,7 @@ mod tests {
|
|||
assert_eq_m256h(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512fp16")]
|
||||
#[simd_test(enable = "avx512fp16,avx512vl")]
|
||||
unsafe fn test_mm512_maskz_cvtx_roundps_ph() {
|
||||
let a = _mm512_set_ps(
|
||||
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
|
||||
|
|
|
|||
|
|
@ -64,16 +64,16 @@ mod tests {
|
|||
use crate::core_arch::x86::*;
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
unsafe fn test_rdtsc() {
|
||||
let r = _rdtsc();
|
||||
#[test]
|
||||
fn test_rdtsc() {
|
||||
let r = unsafe { _rdtsc() };
|
||||
assert_ne!(r, 0); // The chances of this being 0 are infinitesimal
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse2")]
|
||||
unsafe fn test_rdtscp() {
|
||||
#[test]
|
||||
fn test_rdtscp() {
|
||||
let mut aux = 0;
|
||||
let r = __rdtscp(&mut aux);
|
||||
let r = unsafe { __rdtscp(&mut aux) };
|
||||
assert_ne!(r, 0); // The chances of this being 0 are infinitesimal
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3052,8 +3052,9 @@ mod tests {
|
|||
assert_eq_m128(r, _mm_set1_ps(0.0));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "sse")]
|
||||
unsafe fn test_MM_SHUFFLE() {
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_MM_SHUFFLE() {
|
||||
assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11);
|
||||
assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00);
|
||||
assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01);
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
use crate::core_arch::{simd::*, x86::*};
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
|
|
@ -242,6 +244,206 @@ pub unsafe fn _tile_cmmrlfp16ps<const DST: i32, const A: i32, const B: i32>() {
|
|||
tcmmrlfp16ps(DST as i8, A as i8, B as i8);
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF8 (8-bit E5M2) floating-point elements in tile a and BF8 (8-bit E5M2)
|
||||
/// floating-point elements in tile b, accumulating the intermediate single-precision
|
||||
/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
|
||||
/// back to tile dst.
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(0, 1, 2)]
|
||||
#[target_feature(enable = "amx-fp8")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(tdpbf8ps, DST = 0, A = 1, B = 2)
|
||||
)]
|
||||
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
|
||||
pub unsafe fn _tile_dpbf8ps<const DST: i32, const A: i32, const B: i32>() {
|
||||
static_assert_uimm_bits!(DST, 3);
|
||||
static_assert_uimm_bits!(A, 3);
|
||||
static_assert_uimm_bits!(B, 3);
|
||||
tdpbf8ps(DST as i8, A as i8, B as i8);
|
||||
}
|
||||
|
||||
/// Compute dot-product of BF8 (8-bit E5M2) floating-point elements in tile a and HF8
|
||||
/// (8-bit E4M3) floating-point elements in tile b, accumulating the intermediate single-precision
|
||||
/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
|
||||
/// back to tile dst.
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(0, 1, 2)]
|
||||
#[target_feature(enable = "amx-fp8")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(tdpbhf8ps, DST = 0, A = 1, B = 2)
|
||||
)]
|
||||
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
|
||||
pub unsafe fn _tile_dpbhf8ps<const DST: i32, const A: i32, const B: i32>() {
|
||||
static_assert_uimm_bits!(DST, 3);
|
||||
static_assert_uimm_bits!(A, 3);
|
||||
static_assert_uimm_bits!(B, 3);
|
||||
tdpbhf8ps(DST as i8, A as i8, B as i8);
|
||||
}
|
||||
|
||||
/// Compute dot-product of HF8 (8-bit E4M3) floating-point elements in tile a and BF8
|
||||
/// (8-bit E5M2) floating-point elements in tile b, accumulating the intermediate single-precision
|
||||
/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
|
||||
/// back to tile dst.
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(0, 1, 2)]
|
||||
#[target_feature(enable = "amx-fp8")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(tdphbf8ps, DST = 0, A = 1, B = 2)
|
||||
)]
|
||||
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
|
||||
pub unsafe fn _tile_dphbf8ps<const DST: i32, const A: i32, const B: i32>() {
|
||||
static_assert_uimm_bits!(DST, 3);
|
||||
static_assert_uimm_bits!(A, 3);
|
||||
static_assert_uimm_bits!(B, 3);
|
||||
tdphbf8ps(DST as i8, A as i8, B as i8);
|
||||
}
|
||||
|
||||
/// Compute dot-product of HF8 (8-bit E4M3) floating-point elements in tile a and HF8 (8-bit E4M3)
|
||||
/// floating-point elements in tile b, accumulating the intermediate single-precision
|
||||
/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
|
||||
/// back to tile dst.
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(0, 1, 2)]
|
||||
#[target_feature(enable = "amx-fp8")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(tdphf8ps, DST = 0, A = 1, B = 2)
|
||||
)]
|
||||
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
|
||||
pub unsafe fn _tile_dphf8ps<const DST: i32, const A: i32, const B: i32>() {
|
||||
static_assert_uimm_bits!(DST, 3);
|
||||
static_assert_uimm_bits!(A, 3);
|
||||
static_assert_uimm_bits!(B, 3);
|
||||
tdphf8ps(DST as i8, A as i8, B as i8);
|
||||
}
|
||||
|
||||
/// Load tile rows from memory specified by base address and stride into destination tile dst
|
||||
/// using the tile configuration previously configured via _tile_loadconfig.
|
||||
/// Additionally, this intrinsic indicates the source memory location is likely to become
|
||||
/// read-shared by multiple processors, i.e., read in the future by at least one other processor
|
||||
/// before it is written, assuming it is ever written in the future.
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(0)]
|
||||
#[target_feature(enable = "amx-movrs")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(tileloaddrs, DST = 0)
|
||||
)]
|
||||
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
|
||||
pub unsafe fn _tile_loaddrs<const DST: i32>(base: *const u8, stride: usize) {
|
||||
static_assert_uimm_bits!(DST, 3);
|
||||
tileloaddrs64(DST as i8, base, stride);
|
||||
}
|
||||
|
||||
/// Load tile rows from memory specified by base address and stride into destination tile dst
|
||||
/// using the tile configuration previously configured via _tile_loadconfig.
|
||||
/// Provides a hint to the implementation that the data would be reused but does not need
|
||||
/// to be resident in the nearest cache levels.
|
||||
/// Additionally, this intrinsic indicates the source memory location is likely to become
|
||||
/// read-shared by multiple processors, i.e., read in the future by at least one other processor
|
||||
/// before it is written, assuming it is ever written in the future.
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(0)]
|
||||
#[target_feature(enable = "amx-movrs")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(tileloaddrst1, DST = 0)
|
||||
)]
|
||||
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
|
||||
pub unsafe fn _tile_stream_loaddrs<const DST: i32>(base: *const u8, stride: usize) {
|
||||
static_assert_uimm_bits!(DST, 3);
|
||||
tileloaddrst164(DST as i8, base, stride);
|
||||
}
|
||||
|
||||
/// Perform matrix multiplication of two tiles a and b, containing packed single precision (32-bit)
|
||||
/// floating-point elements, which are converted to TF32 (tensor-float32) format, and accumulate the
|
||||
/// results into a packed single precision tile.
|
||||
/// For each possible combination of (row of a, column of b), it performs
|
||||
/// - convert to TF32
|
||||
/// - multiply the corresponding elements of a and b
|
||||
/// - accumulate the results into the corresponding row and column of dst using round-to-nearest-even
|
||||
/// rounding mode.
|
||||
/// Output FP32 denormals are always flushed to zero, input single precision denormals are always
|
||||
/// handled and *not* treated as zero.
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(0, 1, 2)]
|
||||
#[target_feature(enable = "amx-tf32")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(tmmultf32ps, DST = 0, A = 1, B = 2)
|
||||
)]
|
||||
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
|
||||
pub unsafe fn _tile_mmultf32ps<const DST: i32, const A: i32, const B: i32>() {
|
||||
static_assert_uimm_bits!(DST, 3);
|
||||
static_assert_uimm_bits!(A, 3);
|
||||
static_assert_uimm_bits!(B, 3);
|
||||
tmmultf32ps(DST as i8, A as i8, B as i8);
|
||||
}
|
||||
|
||||
/// Moves a row from a tile register to a zmm register, converting the packed 32-bit signed integer
|
||||
/// elements to packed single-precision (32-bit) floating-point elements.
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(0)]
|
||||
#[target_feature(enable = "amx-avx512,avx10.2")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(tcvtrowd2ps, TILE = 0)
|
||||
)]
|
||||
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
|
||||
pub unsafe fn _tile_cvtrowd2ps<const TILE: i32>(row: u32) -> __m512 {
|
||||
static_assert_uimm_bits!(TILE, 3);
|
||||
tcvtrowd2ps(TILE as i8, row).as_m512()
|
||||
}
|
||||
|
||||
/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
|
||||
/// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
|
||||
/// 16-bit elements are placed in the high 16-bits within each 32-bit element of the returned vector.
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(0)]
|
||||
#[target_feature(enable = "amx-avx512,avx10.2")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(tcvtrowps2phh, TILE = 0)
|
||||
)]
|
||||
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
|
||||
pub unsafe fn _tile_cvtrowps2phh<const TILE: i32>(row: u32) -> __m512h {
|
||||
static_assert_uimm_bits!(TILE, 3);
|
||||
tcvtrowps2phh(TILE as i8, row).as_m512h()
|
||||
}
|
||||
|
||||
/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
|
||||
/// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
|
||||
/// 16-bit elements are placed in the low 16-bits within each 32-bit element of the returned vector.
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(0)]
|
||||
#[target_feature(enable = "amx-avx512,avx10.2")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(tcvtrowps2phl, TILE = 0)
|
||||
)]
|
||||
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
|
||||
pub unsafe fn _tile_cvtrowps2phl<const TILE: i32>(row: u32) -> __m512h {
|
||||
static_assert_uimm_bits!(TILE, 3);
|
||||
tcvtrowps2phl(TILE as i8, row).as_m512h()
|
||||
}
|
||||
|
||||
/// Moves one row of tile data into a zmm vector register
|
||||
#[inline]
|
||||
#[rustc_legacy_const_generics(0)]
|
||||
#[target_feature(enable = "amx-avx512,avx10.2")]
|
||||
#[cfg_attr(
|
||||
all(test, any(target_os = "linux", target_env = "msvc")),
|
||||
assert_instr(tilemovrow, TILE = 0)
|
||||
)]
|
||||
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
|
||||
pub unsafe fn _tile_movrow<const TILE: i32>(row: u32) -> __m512i {
|
||||
static_assert_uimm_bits!(TILE, 3);
|
||||
tilemovrow(TILE as i8, row).as_m512i()
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
unsafe extern "C" {
|
||||
#[link_name = "llvm.x86.ldtilecfg"]
|
||||
|
|
@ -274,13 +476,35 @@ unsafe extern "C" {
|
|||
fn tcmmimfp16ps(dst: i8, a: i8, b: i8);
|
||||
#[link_name = "llvm.x86.tcmmrlfp16ps"]
|
||||
fn tcmmrlfp16ps(dst: i8, a: i8, b: i8);
|
||||
#[link_name = "llvm.x86.tdpbf8ps"]
|
||||
fn tdpbf8ps(dst: i8, a: i8, b: i8);
|
||||
#[link_name = "llvm.x86.tdpbhf8ps"]
|
||||
fn tdpbhf8ps(dst: i8, a: i8, b: i8);
|
||||
#[link_name = "llvm.x86.tdphbf8ps"]
|
||||
fn tdphbf8ps(dst: i8, a: i8, b: i8);
|
||||
#[link_name = "llvm.x86.tdphf8ps"]
|
||||
fn tdphf8ps(dst: i8, a: i8, b: i8);
|
||||
#[link_name = "llvm.x86.tileloaddrs64"]
|
||||
fn tileloaddrs64(dst: i8, base: *const u8, stride: usize);
|
||||
#[link_name = "llvm.x86.tileloaddrst164"]
|
||||
fn tileloaddrst164(dst: i8, base: *const u8, stride: usize);
|
||||
#[link_name = "llvm.x86.tmmultf32ps"]
|
||||
fn tmmultf32ps(dst: i8, a: i8, b: i8);
|
||||
#[link_name = "llvm.x86.tcvtrowd2ps"]
|
||||
fn tcvtrowd2ps(tile: i8, row: u32) -> f32x16;
|
||||
#[link_name = "llvm.x86.tcvtrowps2phh"]
|
||||
fn tcvtrowps2phh(tile: i8, row: u32) -> f16x32;
|
||||
#[link_name = "llvm.x86.tcvtrowps2phl"]
|
||||
fn tcvtrowps2phl(tile: i8, row: u32) -> f16x32;
|
||||
#[link_name = "llvm.x86.tilemovrow"]
|
||||
fn tilemovrow(tile: i8, row: u32) -> i32x16;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::core_arch::x86::_mm_cvtness_sbh;
|
||||
use crate::core_arch::x86_64::*;
|
||||
use core::mem::transmute;
|
||||
use core::{array, mem::transmute};
|
||||
use stdarch_test::simd_test;
|
||||
#[cfg(target_os = "linux")]
|
||||
use syscalls::{Sysno, syscall};
|
||||
|
|
@ -619,4 +843,230 @@ mod tests {
|
|||
_tile_release();
|
||||
assert_eq!(res, [[0f32; 16]; 16]);
|
||||
}
|
||||
|
||||
const BF8_ONE: u8 = 0x3c;
|
||||
const BF8_TWO: u8 = 0x40;
|
||||
const HF8_ONE: u8 = 0x38;
|
||||
const HF8_TWO: u8 = 0x40;
|
||||
|
||||
#[simd_test(enable = "amx-fp8")]
|
||||
unsafe fn test_tile_dpbf8ps() {
|
||||
_init_amx();
|
||||
let ones = [BF8_ONE; 1024];
|
||||
let twos = [BF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-fp8")]
|
||||
unsafe fn test_tile_dpbhf8ps() {
|
||||
_init_amx();
|
||||
let ones = [BF8_ONE; 1024];
|
||||
let twos = [HF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dpbhf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-fp8")]
|
||||
unsafe fn test_tile_dphbf8ps() {
|
||||
_init_amx();
|
||||
let ones = [HF8_ONE; 1024];
|
||||
let twos = [BF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dphbf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-fp8")]
|
||||
unsafe fn test_tile_dphf8ps() {
|
||||
_init_amx();
|
||||
let ones = [HF8_ONE; 1024];
|
||||
let twos = [HF8_TWO; 1024];
|
||||
let mut res = [[0.0_f32; 16]; 16];
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(&ones as *const u8, 64);
|
||||
_tile_loadd::<2>(&twos as *const u8, 64);
|
||||
_tile_dphf8ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
assert_eq!(res, [[128.0_f32; 16]; 16]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-movrs")]
|
||||
unsafe fn test_tile_loaddrs() {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mat = [1_i8; 1024];
|
||||
_tile_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
|
||||
let mut out = [[0_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[1; 64]; 16]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-movrs")]
|
||||
unsafe fn test_tile_stream_loaddrs() {
|
||||
_init_amx();
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
let mat = [1_i8; 1024];
|
||||
_tile_stream_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
|
||||
let mut out = [[0_i8; 64]; 16];
|
||||
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
|
||||
_tile_release();
|
||||
assert_eq!(out, [[1; 64]; 16]);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-avx512,avx10.2")]
|
||||
unsafe fn test_tile_movrow() {
|
||||
_init_amx();
|
||||
let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_movrow::<0>(i);
|
||||
assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-avx512,avx10.2")]
|
||||
unsafe fn test_tile_cvtrowd2ps() {
|
||||
_init_amx();
|
||||
let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_cvtrowd2ps::<0>(i);
|
||||
assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-avx512,avx10.2")]
|
||||
unsafe fn test_tile_cvtrowps2phh() {
|
||||
_init_amx();
|
||||
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_cvtrowps2phh::<0>(i);
|
||||
assert_eq!(
|
||||
*row.as_f16x32().as_array(),
|
||||
array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-avx512,avx10.2")]
|
||||
unsafe fn test_tile_cvtrowps2phl() {
|
||||
_init_amx();
|
||||
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
config.colsb[0] = 64;
|
||||
config.rows[0] = 16;
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_loadd::<0>(array.as_ptr().cast(), 64);
|
||||
for i in 0..16 {
|
||||
let row = _tile_cvtrowps2phl::<0>(i);
|
||||
assert_eq!(
|
||||
*row.as_f16x32().as_array(),
|
||||
array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[simd_test(enable = "amx-tf32")]
|
||||
unsafe fn test_tile_mmultf32ps() {
|
||||
_init_amx();
|
||||
let a: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
|
||||
let b: [[f32; 16]; 16] = [array::from_fn(|j| j as _); _];
|
||||
let mut res = [[0.0; 16]; 16];
|
||||
|
||||
let mut config = __tilecfg::default();
|
||||
config.palette = 1;
|
||||
(0..=2).for_each(|i| {
|
||||
config.colsb[i] = 64;
|
||||
config.rows[i] = 16;
|
||||
});
|
||||
_tile_loadconfig(config.as_ptr());
|
||||
_tile_zero::<0>();
|
||||
_tile_loadd::<1>(a.as_ptr().cast(), 64);
|
||||
_tile_loadd::<2>(b.as_ptr().cast(), 64);
|
||||
_tile_mmultf32ps::<0, 1, 2>();
|
||||
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
|
||||
_tile_release();
|
||||
|
||||
let expected = array::from_fn(|i| array::from_fn(|j| 16.0 * i as f32 * j as f32));
|
||||
assert_eq!(res, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6453,6 +6453,7 @@ mod tests {
|
|||
assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_set1_epi64() {
|
||||
let r = _mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
assert_eq_m512i(r, _mm512_set1_epi64(2));
|
||||
|
|
@ -6464,6 +6465,7 @@ mod tests {
|
|||
assert_eq_m512d(expected, _mm512_set1_pd(2.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_set4_epi64() {
|
||||
let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1);
|
||||
assert_eq_m512i(r, _mm512_set4_epi64(4, 3, 2, 1));
|
||||
|
|
@ -6475,6 +6477,7 @@ mod tests {
|
|||
assert_eq_m512d(r, _mm512_set4_pd(4., 3., 2., 1.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_setr4_epi64() {
|
||||
let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1);
|
||||
assert_eq_m512i(r, _mm512_setr4_epi64(1, 2, 3, 4));
|
||||
|
|
@ -7335,6 +7338,7 @@ mod tests {
|
|||
assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_cmpneq_epi64_mask() {
|
||||
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
|
||||
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
|
||||
|
|
@ -9685,7 +9689,7 @@ mod tests {
|
|||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_permutex_epi64() {
|
||||
let a = _mm256_set_epi64x(3, 2, 1, 0);
|
||||
let r = _mm256_maskz_permutex_epi64::<0b11_11_11_11>(0, a);
|
||||
|
|
|
|||
|
|
@ -48,8 +48,12 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
|
|||
.expect("Error parsing input file");
|
||||
|
||||
intrinsics.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
intrinsics.dedup();
|
||||
|
||||
let mut intrinsics = intrinsics
|
||||
let sample_percentage: usize = cli_options.sample_percentage as usize;
|
||||
let sample_size = (intrinsics.len() * sample_percentage) / 100;
|
||||
|
||||
let intrinsics = intrinsics
|
||||
.into_iter()
|
||||
// Not sure how we would compare intrinsic that returns void.
|
||||
.filter(|i| i.results.kind() != TypeKind::Void)
|
||||
|
|
@ -61,8 +65,8 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
|
|||
.filter(|i| !i.arguments.iter().any(|a| a.ty.inner_size() == 128))
|
||||
.filter(|i| !cli_options.skip.contains(&i.name))
|
||||
.filter(|i| !(a32 && i.arch_tags == vec!["A64".to_string()]))
|
||||
.take(sample_size)
|
||||
.collect::<Vec<_>>();
|
||||
intrinsics.dedup();
|
||||
|
||||
Self {
|
||||
intrinsics,
|
||||
|
|
|
|||
|
|
@ -86,6 +86,10 @@ pub fn compare_outputs(intrinsic_name_list: &Vec<String>, runner: &str, target:
|
|||
println!("Failed to run rust program for intrinsic {intrinsic}")
|
||||
}
|
||||
});
|
||||
println!("{} differences found", intrinsics.len());
|
||||
println!(
|
||||
"{} differences found (tested {} intrinsics)",
|
||||
intrinsics.len(),
|
||||
intrinsic_name_list.len()
|
||||
);
|
||||
intrinsics.is_empty()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,12 +79,16 @@ pub trait SupportedArchitectureTest {
|
|||
trace!("compiling mod_{i}.cpp");
|
||||
if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() {
|
||||
let compile_output = cpp_compiler
|
||||
.compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"));
|
||||
.compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))
|
||||
.map_err(|e| format!("Error compiling mod_{i}.cpp: {e:?}"))?;
|
||||
|
||||
assert!(
|
||||
compile_output.status.success(),
|
||||
"{}",
|
||||
String::from_utf8_lossy(&compile_output.stderr)
|
||||
);
|
||||
|
||||
trace!("finished compiling mod_{i}.cpp");
|
||||
if let Err(compile_error) = compile_output {
|
||||
return Err(format!("Error compiling mod_{i}.cpp: {compile_error:?}"));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
})
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ fn run(test_environment: impl SupportedArchitectureTest) {
|
|||
if !test_environment.build_rust_file() {
|
||||
std::process::exit(3);
|
||||
}
|
||||
info!("comaparing outputs");
|
||||
info!("comparing outputs");
|
||||
if !test_environment.compare_outputs() {
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ use crate::common::compile_c::CppCompilation;
|
|||
use crate::common::intrinsic::Intrinsic;
|
||||
use crate::common::intrinsic_helpers::TypeKind;
|
||||
use intrinsic::X86IntrinsicType;
|
||||
use itertools::Itertools;
|
||||
use xml_parser::get_xml_intrinsics;
|
||||
|
||||
pub struct X86ArchitectureTest {
|
||||
|
|
@ -44,12 +43,16 @@ impl SupportedArchitectureTest for X86ArchitectureTest {
|
|||
const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS;
|
||||
|
||||
fn create(cli_options: ProcessedCli) -> Self {
|
||||
let intrinsics =
|
||||
let mut intrinsics =
|
||||
get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file");
|
||||
|
||||
let sample_percentage: usize = cli_options.sample_percentage as usize;
|
||||
intrinsics.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
intrinsics.dedup_by(|a, b| a.name == b.name);
|
||||
|
||||
let mut intrinsics = intrinsics
|
||||
let sample_percentage: usize = cli_options.sample_percentage as usize;
|
||||
let sample_size = (intrinsics.len() * sample_percentage) / 100;
|
||||
|
||||
let intrinsics = intrinsics
|
||||
.into_iter()
|
||||
// Not sure how we would compare intrinsic that returns void.
|
||||
.filter(|i| i.results.kind() != TypeKind::Void)
|
||||
|
|
@ -61,13 +64,9 @@ impl SupportedArchitectureTest for X86ArchitectureTest {
|
|||
.filter(|i| !i.arguments.iter().any(|a| a.is_ptr()))
|
||||
.filter(|i| !i.arguments.iter().any(|a| a.ty.inner_size() == 128))
|
||||
.filter(|i| !cli_options.skip.contains(&i.name))
|
||||
.unique_by(|i| i.name.clone())
|
||||
.take(sample_size)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let sample_size = (intrinsics.len() * sample_percentage) / 100;
|
||||
intrinsics.truncate(sample_size);
|
||||
|
||||
intrinsics.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
Self {
|
||||
intrinsics: intrinsics,
|
||||
cli_options: cli_options,
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ pub(crate) fn disassemble_myself() -> HashSet<Function> {
|
|||
let objdump = env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string());
|
||||
let add_args = if cfg!(target_vendor = "apple") && cfg!(target_arch = "aarch64") {
|
||||
// Target features need to be enabled for LLVM objdump on Darwin ARM64
|
||||
vec!["--mattr=+v8.6a,+crypto,+tme"]
|
||||
vec!["--mattr=+v8.6a,+crypto"]
|
||||
} else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
|
||||
vec!["--mattr=+zk,+zks,+zbc,+zbb"]
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -444,7 +444,6 @@ fn verify_all_signatures() {
|
|||
&& !rust.file.ends_with("v6.rs\"")
|
||||
&& !rust.file.ends_with("v7.rs\"")
|
||||
&& !rust.file.ends_with("v8.rs\"")
|
||||
&& !rust.file.ends_with("tme.rs\"")
|
||||
&& !rust.file.ends_with("mte.rs\"")
|
||||
&& !rust.file.ends_with("ex.rs\"")
|
||||
&& !skip_intrinsic_verify.contains(&rust.name)
|
||||
|
|
|
|||
|
|
@ -304,6 +304,14 @@ fn verify_all_signatures() {
|
|||
if feature.contains("sse4a") || feature.contains("tbm") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// FIXME: these have not been added to Intrinsics Guide yet
|
||||
if ["amx-avx512", "amx-fp8", "amx-movrs", "amx-tf32"]
|
||||
.iter()
|
||||
.any(|f| feature.contains(f))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let intel = match map.remove(rust.name) {
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
73e6c9ebd9123154a196300ef58e30ec8928e74e
|
||||
8401398e1f14a24670ee1a3203713dc2f0f8b3a8
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue