Auto merge of #149118 - folkertdev:stdarch-sync-2025-11-19, r=tgross35

stdarch subtree update

Subtree update of `stdarch` to 50134e10cb.

Created using https://github.com/rust-lang/josh-sync.

The only interesting commit is the final one, which enables the `avx10_target_feature` feature in the standard library, because it is now used in `stdarch`.

r? `@sayantn` (or whoever, this is just a straightforward sync)
This commit is contained in:
bors 2025-11-20 10:08:24 +00:00
commit 2c0f4860cc
29 changed files with 1031 additions and 692 deletions

View file

@ -195,6 +195,7 @@
// tidy-alphabetical-start
#![feature(aarch64_unstable_target_feature)]
#![feature(arm_target_feature)]
#![feature(avx10_target_feature)]
#![feature(hexagon_target_feature)]
#![feature(loongarch_target_feature)]
#![feature(mips_target_feature)]

View file

@ -16,7 +16,7 @@ jobs:
# https://rust-lang.zulipchat.com/#narrow/channel/208962-t-libs.2Fstdarch/topic/Subtree.20sync.20automation/with/528461782
zulip-stream-id: 208962
zulip-bot-email: "stdarch-ci-bot@rust-lang.zulipchat.com"
pr-base-branch: master
pr-base-branch: main
branch-name: rustc-pull
secrets:
zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }}

View file

@ -15,5 +15,4 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \
OBJDUMP=aarch64-linux-gnu-objdump \
STDARCH_TEST_SKIP_FEATURE=tme
OBJDUMP=aarch64-linux-gnu-objdump

View file

@ -27,4 +27,3 @@ ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc"
ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc"
ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}"
ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump"
ENV STDARCH_TEST_SKIP_FEATURE=tme

View file

@ -36,6 +36,7 @@ run() {
--env NORUN \
--env RUSTFLAGS \
--env CARGO_UNSTABLE_BUILD_STD \
--env TEST_SAMPLE_INTRINSICS_PERCENTAGE \
--volume "${HOME}/.cargo":/cargo \
--volume "$(rustc --print sysroot)":/rust:ro \
--volume "$(pwd)":/checkout:ro \

View file

@ -51,6 +51,7 @@ case ${TARGET} in
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
TEST_CXX_COMPILER="clang++"
TEST_RUNNER="${CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER}"
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
;;
aarch64_be-unknown-linux-gnu*)
@ -58,6 +59,7 @@ case ${TARGET} in
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt
TEST_CXX_COMPILER="clang++"
TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}"
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
;;
armv7-unknown-linux-gnueabihf*)
@ -65,6 +67,7 @@ case ${TARGET} in
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt
TEST_CXX_COMPILER="clang++"
TEST_RUNNER="${CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER}"
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}"
;;
x86_64-unknown-linux-gnu*)
@ -72,7 +75,7 @@ case ${TARGET} in
TEST_CXX_COMPILER="clang++"
TEST_RUNNER="${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}"
TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt
TEST_SAMPLE_INTRINSICS_PERCENTAGE=5
: "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=5}"
;;
*)
;;
@ -82,23 +85,25 @@ esac
# Arm specific
case "${TARGET}" in
aarch64-unknown-linux-gnu*|armv7-unknown-linux-gnueabihf*)
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=info \
cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
--bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
--runner "${TEST_RUNNER}" \
--cppcompiler "${TEST_CXX_COMPILER}" \
--skip "${TEST_SKIP_INTRINSICS}" \
--target "${TARGET}"
--target "${TARGET}" \
--sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}"
;;
aarch64_be-unknown-linux-gnu*)
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=warn \
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" RUST_LOG=info \
cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
--bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \
--runner "${TEST_RUNNER}" \
--cppcompiler "${TEST_CXX_COMPILER}" \
--skip "${TEST_SKIP_INTRINSICS}" \
--target "${TARGET}" \
--sample-percentage "${TEST_SAMPLE_INTRINSICS_PERCENTAGE}" \
--linker "${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER}" \
--cxx-toolchain-dir "${AARCH64_BE_TOOLCHAIN}"
;;
@ -109,7 +114,7 @@ case "${TARGET}" in
# Hence the use of `env -u`.
env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \
CPPFLAGS="${TEST_CPPFLAGS}" RUSTFLAGS="${HOST_RUSTFLAGS}" \
RUST_LOG=warn RUST_BACKTRACE=1 \
RUST_LOG=info RUST_BACKTRACE=1 \
cargo run "${INTRINSIC_TEST}" "${PROFILE}" \
--bin intrinsic-test -- intrinsics_data/x86-intel.xml \
--runner "${TEST_RUNNER}" \

View file

@ -3,7 +3,7 @@
The `core::arch` module implements architecture-dependent intrinsics (e.g. SIMD).
# Usage
# Usage
`core::arch` is available as part of `libcore` and it is re-exported by
`libstd`. Prefer using it via `core::arch` or `std::arch` than via this crate.
@ -17,7 +17,7 @@ are:
you need to re-compile it for a non-standard target, please prefer using
`xargo` and re-compiling `libcore`/`libstd` as appropriate instead of using
this crate.
* using some features that might not be available even behind unstable Rust
features. We try to keep these to a minimum. If you need to use some of these
features, please open an issue so that we can expose them in nightly Rust and
@ -34,7 +34,7 @@ are:
* [How to get started][contrib]
* [How to help implement intrinsics][help-implement]
[contrib]: https://github.com/rust-lang/stdarch/blob/master/CONTRIBUTING.md
[contrib]: https://github.com/rust-lang/stdarch/blob/HEAD/CONTRIBUTING.md
[help-implement]: https://github.com/rust-lang/stdarch/issues/40
[i686]: https://rust-lang.github.io/stdarch/i686/core_arch/
[x86_64]: https://rust-lang.github.io/stdarch/x86_64/core_arch/

View file

@ -21,10 +21,6 @@ mod neon;
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
pub use self::neon::*;
mod tme;
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub use self::tme::*;
mod prefetch;
#[unstable(feature = "stdarch_aarch64_prefetch", issue = "117217")]
pub use self::prefetch::*;

View file

@ -1,201 +0,0 @@
//! ARM's Transactional Memory Extensions (TME).
//!
//! This CPU feature is available on Aarch64 - A architecture profile.
//! This feature is in the non-neon feature set. TME specific vendor documentation can
//! be found [TME Intrinsics Introduction][tme_intrinsics_intro].
//!
//! The reference is [ACLE Q4 2019][acle_q4_2019_ref].
//!
//! ACLE has a section for TME extensions and state masks for aborts and failure codes.
//! [ARM A64 Architecture Register Datasheet][a_profile_future] also describes possible failure code scenarios.
//!
//! [acle_q4_2019_ref]: https://static.docs.arm.com/101028/0010/ACLE_2019Q4_release-0010.pdf
//! [tme_intrinsics_intro]: https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics
//! [llvm_aarch64_int]: https://github.com/llvm/llvm-project/commit/a36d31478c182903523e04eb271bbf102bfab2cc#diff-ff24e1c35f4d54f1110ce5d90c709319R626-R646
//! [a_profile_future]: https://static.docs.arm.com/ddi0601/a/SysReg_xml_futureA-2019-04.pdf?_ga=2.116560387.441514988.1590524918-1110153136.1588469296
#[cfg(test)]
use stdarch_test::assert_instr;
unsafe extern "unadjusted" {
#[link_name = "llvm.aarch64.tstart"]
fn aarch64_tstart() -> u64;
#[link_name = "llvm.aarch64.tcommit"]
fn aarch64_tcommit();
#[link_name = "llvm.aarch64.tcancel"]
fn aarch64_tcancel(imm0: u64);
#[link_name = "llvm.aarch64.ttest"]
fn aarch64_ttest() -> u64;
}
/// Transaction successfully started.
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMSTART_SUCCESS: u64 = 0x00_u64;
/// Extraction mask for failure reason
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_REASON: u64 = 0x00007FFF_u64;
/// Transaction retry is possible.
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_RTRY: u64 = 1 << 15;
/// Transaction executed a TCANCEL instruction
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_CNCL: u64 = 1 << 16;
/// Transaction aborted because a conflict occurred
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_MEM: u64 = 1 << 17;
/// Fallback error type for any other reason
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_IMP: u64 = 1 << 18;
/// Transaction aborted because a non-permissible operation was attempted
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_ERR: u64 = 1 << 19;
/// Transaction aborted due to read or write set limit was exceeded
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_SIZE: u64 = 1 << 20;
/// Transaction aborted due to transactional nesting level was exceeded
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_NEST: u64 = 1 << 21;
/// Transaction aborted due to a debug trap.
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_DBG: u64 = 1 << 22;
/// Transaction failed from interrupt
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_INT: u64 = 1 << 23;
/// Indicates a TRIVIAL version of TM is available
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub const _TMFAILURE_TRIVIAL: u64 = 1 << 24;
// NOTE: Tests for these instructions are disabled on MSVC as dumpbin doesn't
// understand these instructions.
/// Starts a new transaction. When the transaction starts successfully the return value is 0.
/// If the transaction fails, all state modifications are discarded and a cause of the failure
/// is encoded in the return value.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tstart))]
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub unsafe fn __tstart() -> u64 {
aarch64_tstart()
}
/// Commits the current transaction. For a nested transaction, the only effect is that the
/// transactional nesting depth is decreased. For an outer transaction, the state modifications
/// performed transactionally are committed to the architectural state.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(tcommit))]
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub unsafe fn __tcommit() {
aarch64_tcommit()
}
/// Cancels the current transaction and discards all state modifications that were performed transactionally.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(
all(test, not(target_env = "msvc")),
assert_instr(tcancel, IMM16 = 0x0)
)]
#[rustc_legacy_const_generics(0)]
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub unsafe fn __tcancel<const IMM16: u64>() {
static_assert!(IMM16 <= 65535);
aarch64_tcancel(IMM16);
}
/// Tests if executing inside a transaction. If no transaction is currently executing,
/// the return value is 0. Otherwise, this intrinsic returns the depth of the transaction.
///
/// [ARM TME Intrinsics](https://developer.arm.com/docs/101028/0010/transactional-memory-extension-tme-intrinsics).
#[inline]
#[target_feature(enable = "tme")]
#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(ttest))]
#[unstable(feature = "stdarch_aarch64_tme", issue = "117216")]
pub unsafe fn __ttest() -> u64 {
aarch64_ttest()
}
#[cfg(test)]
mod tests {
use stdarch_test::simd_test;
use crate::core_arch::aarch64::*;
const CANCEL_CODE: u64 = (0 | (0x123 & _TMFAILURE_REASON) as u64) as u64;
#[simd_test(enable = "tme")]
unsafe fn test_tstart() {
let mut x = 0;
for i in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
x += 1;
assert_eq!(x, i + 1);
break;
}
assert_eq!(x, 0);
}
}
#[simd_test(enable = "tme")]
unsafe fn test_tcommit() {
let mut x = 0;
for i in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
x += 1;
assert_eq!(x, i + 1);
tme::__tcommit();
}
assert_eq!(x, i + 1);
}
}
#[simd_test(enable = "tme")]
unsafe fn test_tcancel() {
let mut x = 0;
for i in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
x += 1;
assert_eq!(x, i + 1);
tme::__tcancel::<CANCEL_CODE>();
break;
}
}
assert_eq!(x, 0);
}
#[simd_test(enable = "tme")]
unsafe fn test_ttest() {
for _ in 0..10 {
let code = tme::__tstart();
if code == _TMSTART_SUCCESS {
if tme::__ttest() == 2 {
tme::__tcancel::<CANCEL_CODE>();
break;
}
}
}
}
}

View file

@ -33,7 +33,8 @@
f16,
aarch64_unstable_target_feature,
bigint_helper_methods,
funnel_shifts
funnel_shifts,
avx10_target_feature
)]
#![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
#![deny(clippy::missing_inline_in_public_items)]

View file

@ -163,3 +163,17 @@ macro_rules! simd_extract {
($x:expr, $idx:expr $(,)?) => {{ $crate::intrinsics::simd::simd_extract($x, const { $idx }) }};
($x:expr, $idx:expr, $ty:ty $(,)?) => {{ $crate::intrinsics::simd::simd_extract::<_, $ty>($x, const { $idx }) }};
}
#[allow(unused)]
macro_rules! simd_masked_load {
($align:expr, $mask:expr, $ptr:expr, $default:expr) => {
$crate::intrinsics::simd::simd_masked_load::<_, _, _, { $align }>($mask, $ptr, $default)
};
}
#[allow(unused)]
macro_rules! simd_masked_store {
($align:expr, $mask:expr, $ptr:expr, $default:expr) => {
$crate::intrinsics::simd::simd_masked_store::<_, _, _, { $align }>($mask, $ptr, $default)
};
}

View file

@ -1675,7 +1675,8 @@ pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d {
maskloadpd256(mem_addr as *const i8, mask.as_i64x4())
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_pd())
}
/// Stores packed double-precision (64-bit) floating-point elements from `a`
@ -1687,7 +1688,8 @@ pub unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d) {
maskstorepd256(mem_addr as *mut i8, mask.as_i64x4(), a);
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}
/// Loads packed double-precision (64-bit) floating-point elements from memory
@ -1700,7 +1702,8 @@ pub unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d)
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
maskloadpd(mem_addr as *const i8, mask.as_i64x2())
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_pd())
}
/// Stores packed double-precision (64-bit) floating-point elements from `a`
@ -1712,7 +1715,8 @@ pub unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d {
#[cfg_attr(test, assert_instr(vmaskmovpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
maskstorepd(mem_addr as *mut i8, mask.as_i64x2(), a);
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}
/// Loads packed single-precision (32-bit) floating-point elements from memory
@ -1725,7 +1729,8 @@ pub unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d) {
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256 {
maskloadps256(mem_addr as *const i8, mask.as_i32x8())
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm256_setzero_ps())
}
/// Stores packed single-precision (32-bit) floating-point elements from `a`
@ -1737,7 +1742,8 @@ pub unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256) {
maskstoreps256(mem_addr as *mut i8, mask.as_i32x8(), a);
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}
/// Loads packed single-precision (32-bit) floating-point elements from memory
@ -1750,7 +1756,8 @@ pub unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256)
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
maskloadps(mem_addr as *const i8, mask.as_i32x4())
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, _mm_setzero_ps())
}
/// Stores packed single-precision (32-bit) floating-point elements from `a`
@ -1762,7 +1769,8 @@ pub unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128 {
#[cfg_attr(test, assert_instr(vmaskmovps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128) {
maskstoreps(mem_addr as *mut i8, mask.as_i32x4(), a);
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a)
}
/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
@ -3147,22 +3155,6 @@ unsafe extern "C" {
fn vpermilpd256(a: __m256d, b: i64x4) -> __m256d;
#[link_name = "llvm.x86.avx.vpermilvar.pd"]
fn vpermilpd(a: __m128d, b: i64x2) -> __m128d;
#[link_name = "llvm.x86.avx.maskload.pd.256"]
fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
#[link_name = "llvm.x86.avx.maskstore.pd.256"]
fn maskstorepd256(mem_addr: *mut i8, mask: i64x4, a: __m256d);
#[link_name = "llvm.x86.avx.maskload.pd"]
fn maskloadpd(mem_addr: *const i8, mask: i64x2) -> __m128d;
#[link_name = "llvm.x86.avx.maskstore.pd"]
fn maskstorepd(mem_addr: *mut i8, mask: i64x2, a: __m128d);
#[link_name = "llvm.x86.avx.maskload.ps.256"]
fn maskloadps256(mem_addr: *const i8, mask: i32x8) -> __m256;
#[link_name = "llvm.x86.avx.maskstore.ps.256"]
fn maskstoreps256(mem_addr: *mut i8, mask: i32x8, a: __m256);
#[link_name = "llvm.x86.avx.maskload.ps"]
fn maskloadps(mem_addr: *const i8, mask: i32x4) -> __m128;
#[link_name = "llvm.x86.avx.maskstore.ps"]
fn maskstoreps(mem_addr: *mut i8, mask: i32x4, a: __m128);
#[link_name = "llvm.x86.avx.ldu.dq.256"]
fn vlddqu(mem_addr: *const i8) -> i8x32;
#[link_name = "llvm.x86.avx.rcp.ps.256"]
@ -3928,28 +3920,43 @@ mod tests {
#[simd_test(enable = "avx")]
unsafe fn test_mm256_permute2f128_ps() {
let a = _mm256_setr_ps(1., 2., 3., 4., 1., 2., 3., 4.);
let b = _mm256_setr_ps(5., 6., 7., 8., 5., 6., 7., 8.);
let r = _mm256_permute2f128_ps::<0x13>(a, b);
let e = _mm256_setr_ps(5., 6., 7., 8., 1., 2., 3., 4.);
let a = _mm256_setr_ps(11., 12., 13., 14., 15., 16., 17., 18.);
let b = _mm256_setr_ps(21., 22., 23., 24., 25., 26., 27., 28.);
let r = _mm256_permute2f128_ps::<0b0001_0011>(a, b);
let e = _mm256_setr_ps(25., 26., 27., 28., 15., 16., 17., 18.);
assert_eq_m256(r, e);
// Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
let r = _mm256_permute2f128_ps::<0b1001_1011>(a, b);
let z = _mm256_setr_ps(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);
assert_eq_m256(r, z);
}
#[simd_test(enable = "avx")]
unsafe fn test_mm256_permute2f128_pd() {
let a = _mm256_setr_pd(1., 2., 3., 4.);
let b = _mm256_setr_pd(5., 6., 7., 8.);
let r = _mm256_permute2f128_pd::<0x31>(a, b);
let r = _mm256_permute2f128_pd::<0b0011_0001>(a, b);
let e = _mm256_setr_pd(3., 4., 7., 8.);
assert_eq_m256d(r, e);
// Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
let r = _mm256_permute2f128_pd::<0b1011_1001>(a, b);
let e = _mm256_setr_pd(0.0, 0.0, 0.0, 0.0);
assert_eq_m256d(r, e);
}
#[simd_test(enable = "avx")]
unsafe fn test_mm256_permute2f128_si256() {
let a = _mm256_setr_epi32(1, 2, 3, 4, 1, 2, 3, 4);
let b = _mm256_setr_epi32(5, 6, 7, 8, 5, 6, 7, 8);
let r = _mm256_permute2f128_si256::<0x20>(a, b);
let e = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let a = _mm256_setr_epi32(11, 12, 13, 14, 15, 16, 17, 18);
let b = _mm256_setr_epi32(21, 22, 23, 24, 25, 26, 27, 28);
let r = _mm256_permute2f128_si256::<0b0010_0000>(a, b);
let e = _mm256_setr_epi32(11, 12, 13, 14, 21, 22, 23, 24);
assert_eq_m256i(r, e);
// Setting bits 3 or 7 (zero-indexed) zeroes the corresponding field.
let r = _mm256_permute2f128_si256::<0b1010_1000>(a, b);
let e = _mm256_setr_epi32(0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}

View file

@ -1773,7 +1773,7 @@ pub fn _mm256_madd_epi16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaddubsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_u8x32())) }
unsafe { transmute(pmaddubsw(a.as_u8x32(), b.as_i8x32())) }
}
/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
@ -1786,7 +1786,8 @@ pub fn _mm256_maddubs_epi16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i {
transmute(maskloadd(mem_addr as *const i8, mask.as_i32x4()))
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x4::ZERO).as_m128i()
}
/// Loads packed 32-bit integers from memory pointed by `mem_addr` using `mask`
@ -1799,7 +1800,8 @@ pub unsafe fn _mm_maskload_epi32(mem_addr: *const i32, mask: __m128i) -> __m128i
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m256i {
transmute(maskloadd256(mem_addr as *const i8, mask.as_i32x8()))
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i32x8::ZERO).as_m256i()
}
/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
@ -1812,7 +1814,8 @@ pub unsafe fn _mm256_maskload_epi32(mem_addr: *const i32, mask: __m256i) -> __m2
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i {
transmute(maskloadq(mem_addr as *const i8, mask.as_i64x2()))
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x2::ZERO).as_m128i()
}
/// Loads packed 64-bit integers from memory pointed by `mem_addr` using `mask`
@ -1825,7 +1828,8 @@ pub unsafe fn _mm_maskload_epi64(mem_addr: *const i64, mask: __m128i) -> __m128i
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m256i {
transmute(maskloadq256(mem_addr as *const i8, mask.as_i64x4()))
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, i64x4::ZERO).as_m256i()
}
/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
@ -1838,7 +1842,8 @@ pub unsafe fn _mm256_maskload_epi64(mem_addr: *const i64, mask: __m256i) -> __m2
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i) {
maskstored(mem_addr as *mut i8, mask.as_i32x4(), a.as_i32x4())
let mask = simd_shr(mask.as_i32x4(), i32x4::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4())
}
/// Stores packed 32-bit integers from `a` into memory pointed by `mem_addr`
@ -1851,7 +1856,8 @@ pub unsafe fn _mm_maskstore_epi32(mem_addr: *mut i32, mask: __m128i, a: __m128i)
#[cfg_attr(test, assert_instr(vpmaskmovd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m256i) {
maskstored256(mem_addr as *mut i8, mask.as_i32x8(), a.as_i32x8())
let mask = simd_shr(mask.as_i32x8(), i32x8::splat(31));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8())
}
/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
@ -1864,7 +1870,8 @@ pub unsafe fn _mm256_maskstore_epi32(mem_addr: *mut i32, mask: __m256i, a: __m25
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i) {
maskstoreq(mem_addr as *mut i8, mask.as_i64x2(), a.as_i64x2())
let mask = simd_shr(mask.as_i64x2(), i64x2::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2())
}
/// Stores packed 64-bit integers from `a` into memory pointed by `mem_addr`
@ -1877,7 +1884,8 @@ pub unsafe fn _mm_maskstore_epi64(mem_addr: *mut i64, mask: __m128i, a: __m128i)
#[cfg_attr(test, assert_instr(vpmaskmovq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m256i) {
maskstoreq256(mem_addr as *mut i8, mask.as_i64x4(), a.as_i64x4())
let mask = simd_shr(mask.as_i64x4(), i64x4::splat(63));
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4())
}
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@ -2778,7 +2786,12 @@ pub fn _mm256_bslli_epi128<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsllvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(psllvd(a.as_i32x4(), count.as_i32x4())) }
unsafe {
let count = count.as_u32x4();
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
let count = simd_select(no_overflow, count, u32x4::ZERO);
simd_select(no_overflow, simd_shl(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
}
}
/// Shifts packed 32-bit integers in `a` left by the amount
@ -2791,7 +2804,12 @@ pub fn _mm_sllv_epi32(a: __m128i, count: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpsllvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(psllvd256(a.as_i32x8(), count.as_i32x8())) }
unsafe {
let count = count.as_u32x8();
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
let count = simd_select(no_overflow, count, u32x8::ZERO);
simd_select(no_overflow, simd_shl(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
}
}
/// Shifts packed 64-bit integers in `a` left by the amount
@ -2804,7 +2822,12 @@ pub fn _mm256_sllv_epi32(a: __m256i, count: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsllvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(psllvq(a.as_i64x2(), count.as_i64x2())) }
unsafe {
let count = count.as_u64x2();
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
let count = simd_select(no_overflow, count, u64x2::ZERO);
simd_select(no_overflow, simd_shl(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
}
}
/// Shifts packed 64-bit integers in `a` left by the amount
@ -2817,7 +2840,12 @@ pub fn _mm_sllv_epi64(a: __m128i, count: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpsllvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_sllv_epi64(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(psllvq256(a.as_i64x4(), count.as_i64x4())) }
unsafe {
let count = count.as_u64x4();
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
let count = simd_select(no_overflow, count, u64x4::ZERO);
simd_select(no_overflow, simd_shl(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
}
}
/// Shifts packed 16-bit integers in `a` right by `count` while
@ -2881,7 +2909,12 @@ pub fn _mm256_srai_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsravd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(psravd(a.as_i32x4(), count.as_i32x4())) }
unsafe {
let count = count.as_u32x4();
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
let count = simd_select(no_overflow, transmute(count), i32x4::splat(31));
simd_shr(a.as_i32x4(), count).as_m128i()
}
}
/// Shifts packed 32-bit integers in `a` right by the amount specified by the
@ -2893,7 +2926,12 @@ pub fn _mm_srav_epi32(a: __m128i, count: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpsravd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srav_epi32(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(psravd256(a.as_i32x8(), count.as_i32x8())) }
unsafe {
let count = count.as_u32x8();
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
let count = simd_select(no_overflow, transmute(count), i32x8::splat(31));
simd_shr(a.as_i32x8(), count).as_m256i()
}
}
/// Shifts 128-bit lanes in `a` right by `imm8` bytes while shifting in zeros.
@ -3076,7 +3114,12 @@ pub fn _mm256_srli_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsrlvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(psrlvd(a.as_i32x4(), count.as_i32x4())) }
unsafe {
let count = count.as_u32x4();
let no_overflow: u32x4 = simd_lt(count, u32x4::splat(u32::BITS));
let count = simd_select(no_overflow, count, u32x4::ZERO);
simd_select(no_overflow, simd_shr(a.as_u32x4(), count), u32x4::ZERO).as_m128i()
}
}
/// Shifts packed 32-bit integers in `a` right by the amount specified by
@ -3088,7 +3131,12 @@ pub fn _mm_srlv_epi32(a: __m128i, count: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpsrlvd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(psrlvd256(a.as_i32x8(), count.as_i32x8())) }
unsafe {
let count = count.as_u32x8();
let no_overflow: u32x8 = simd_lt(count, u32x8::splat(u32::BITS));
let count = simd_select(no_overflow, count, u32x8::ZERO);
simd_select(no_overflow, simd_shr(a.as_u32x8(), count), u32x8::ZERO).as_m256i()
}
}
/// Shifts packed 64-bit integers in `a` right by the amount specified by
@ -3100,7 +3148,12 @@ pub fn _mm256_srlv_epi32(a: __m256i, count: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpsrlvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(psrlvq(a.as_i64x2(), count.as_i64x2())) }
unsafe {
let count = count.as_u64x2();
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
let count = simd_select(no_overflow, count, u64x2::ZERO);
simd_select(no_overflow, simd_shr(a.as_u64x2(), count), u64x2::ZERO).as_m128i()
}
}
/// Shifts packed 64-bit integers in `a` right by the amount specified by
@ -3112,7 +3165,12 @@ pub fn _mm_srlv_epi64(a: __m128i, count: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(vpsrlvq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(psrlvq256(a.as_i64x4(), count.as_i64x4())) }
unsafe {
let count = count.as_u64x4();
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
let count = simd_select(no_overflow, count, u64x4::ZERO);
simd_select(no_overflow, simd_shr(a.as_u64x4(), count), u64x4::ZERO).as_m256i()
}
}
/// Load 256-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
@ -3644,23 +3702,7 @@ unsafe extern "C" {
#[link_name = "llvm.x86.avx2.phsub.sw"]
fn phsubsw(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx2.pmadd.ub.sw"]
fn pmaddubsw(a: u8x32, b: u8x32) -> i16x16;
#[link_name = "llvm.x86.avx2.maskload.d"]
fn maskloadd(mem_addr: *const i8, mask: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx2.maskload.d.256"]
fn maskloadd256(mem_addr: *const i8, mask: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx2.maskload.q"]
fn maskloadq(mem_addr: *const i8, mask: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx2.maskload.q.256"]
fn maskloadq256(mem_addr: *const i8, mask: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx2.maskstore.d"]
fn maskstored(mem_addr: *mut i8, mask: i32x4, a: i32x4);
#[link_name = "llvm.x86.avx2.maskstore.d.256"]
fn maskstored256(mem_addr: *mut i8, mask: i32x8, a: i32x8);
#[link_name = "llvm.x86.avx2.maskstore.q"]
fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
#[link_name = "llvm.x86.avx2.maskstore.q.256"]
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
fn pmaddubsw(a: u8x32, b: i8x32) -> i16x16;
#[link_name = "llvm.x86.avx2.mpsadbw"]
fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16;
#[link_name = "llvm.x86.avx2.pmul.hr.sw"]
@ -3687,36 +3729,16 @@ unsafe extern "C" {
fn pslld(a: i32x8, count: i32x4) -> i32x8;
#[link_name = "llvm.x86.avx2.psll.q"]
fn psllq(a: i64x4, count: i64x2) -> i64x4;
#[link_name = "llvm.x86.avx2.psllv.d"]
fn psllvd(a: i32x4, count: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx2.psllv.d.256"]
fn psllvd256(a: i32x8, count: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx2.psllv.q"]
fn psllvq(a: i64x2, count: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx2.psllv.q.256"]
fn psllvq256(a: i64x4, count: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx2.psra.w"]
fn psraw(a: i16x16, count: i16x8) -> i16x16;
#[link_name = "llvm.x86.avx2.psra.d"]
fn psrad(a: i32x8, count: i32x4) -> i32x8;
#[link_name = "llvm.x86.avx2.psrav.d"]
fn psravd(a: i32x4, count: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx2.psrav.d.256"]
fn psravd256(a: i32x8, count: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx2.psrl.w"]
fn psrlw(a: i16x16, count: i16x8) -> i16x16;
#[link_name = "llvm.x86.avx2.psrl.d"]
fn psrld(a: i32x8, count: i32x4) -> i32x8;
#[link_name = "llvm.x86.avx2.psrl.q"]
fn psrlq(a: i64x4, count: i64x2) -> i64x4;
#[link_name = "llvm.x86.avx2.psrlv.d"]
fn psrlvd(a: i32x4, count: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx2.psrlv.d.256"]
fn psrlvd256(a: i32x8, count: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx2.psrlv.q"]
fn psrlvq(a: i64x2, count: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx2.psrlv.q.256"]
fn psrlvq256(a: i64x4, count: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx2.pshuf.b"]
fn pshufb(a: u8x32, b: u8x32) -> u8x32;
#[link_name = "llvm.x86.avx2.permd"]
@ -5727,7 +5749,7 @@ mod tests {
assert_eq_m256d(r, _mm256_setr_pd(0.0, 16.0, 64.0, 256.0));
}
#[simd_test(enable = "avx")]
#[simd_test(enable = "avx2")]
unsafe fn test_mm256_extract_epi8() {
#[rustfmt::skip]
let a = _mm256_setr_epi8(

View file

@ -5609,7 +5609,8 @@ pub unsafe fn _mm_storeu_epi8(mem_addr: *mut i8, a: __m128i) {
#[cfg_attr(test, assert_instr(vmovdqu16))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *const i16) -> __m512i {
transmute(loaddqu16_512(mem_addr, src.as_i16x32(), k))
let mask = simd_select_bitmask(k, i16x32::splat(!0), i16x32::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x32()).as_m512i()
}
/// Load packed 16-bit integers from memory into dst using zeromask k
@ -5635,7 +5636,8 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __
#[cfg_attr(test, assert_instr(vmovdqu8))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *const i8) -> __m512i {
transmute(loaddqu8_512(mem_addr, src.as_i8x64(), k))
let mask = simd_select_bitmask(k, i8x64::splat(!0), i8x64::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x64()).as_m512i()
}
/// Load packed 8-bit integers from memory into dst using zeromask k
@ -5661,7 +5663,8 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5
#[cfg_attr(test, assert_instr(vmovdqu16))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *const i16) -> __m256i {
transmute(loaddqu16_256(mem_addr, src.as_i16x16(), k))
let mask = simd_select_bitmask(k, i16x16::splat(!0), i16x16::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x16()).as_m256i()
}
/// Load packed 16-bit integers from memory into dst using zeromask k
@ -5687,7 +5690,8 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __
#[cfg_attr(test, assert_instr(vmovdqu8))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *const i8) -> __m256i {
transmute(loaddqu8_256(mem_addr, src.as_i8x32(), k))
let mask = simd_select_bitmask(k, i8x32::splat(!0), i8x32::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x32()).as_m256i()
}
/// Load packed 8-bit integers from memory into dst using zeromask k
@ -5713,7 +5717,8 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2
#[cfg_attr(test, assert_instr(vmovdqu16))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i16) -> __m128i {
transmute(loaddqu16_128(mem_addr, src.as_i16x8(), k))
let mask = simd_select_bitmask(k, i16x8::splat(!0), i16x8::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i16x8()).as_m128i()
}
/// Load packed 16-bit integers from memory into dst using zeromask k
@ -5739,7 +5744,8 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128
#[cfg_attr(test, assert_instr(vmovdqu8))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i8) -> __m128i {
transmute(loaddqu8_128(mem_addr, src.as_i8x16(), k))
let mask = simd_select_bitmask(k, i8x16::splat(!0), i8x16::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i8x16()).as_m128i()
}
/// Load packed 8-bit integers from memory into dst using zeromask k
@ -5764,7 +5770,8 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i
#[cfg_attr(test, assert_instr(vmovdqu16))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: __m512i) {
storedqu16_512(mem_addr, a.as_i16x32(), mask)
let mask = simd_select_bitmask(mask, i16x32::splat(!0), i16x32::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x32());
}
/// Store packed 8-bit integers from a into memory using writemask k.
@ -5776,7 +5783,8 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _
#[cfg_attr(test, assert_instr(vmovdqu8))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m512i) {
storedqu8_512(mem_addr, a.as_i8x64(), mask)
let mask = simd_select_bitmask(mask, i8x64::splat(!0), i8x64::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x64());
}
/// Store packed 16-bit integers from a into memory using writemask k.
@ -5788,7 +5796,8 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m
#[cfg_attr(test, assert_instr(vmovdqu16))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: __m256i) {
storedqu16_256(mem_addr, a.as_i16x16(), mask)
let mask = simd_select_bitmask(mask, i16x16::splat(!0), i16x16::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x16());
}
/// Store packed 8-bit integers from a into memory using writemask k.
@ -5800,7 +5809,8 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _
#[cfg_attr(test, assert_instr(vmovdqu8))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m256i) {
storedqu8_256(mem_addr, a.as_i8x32(), mask)
let mask = simd_select_bitmask(mask, i8x32::splat(!0), i8x32::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x32());
}
/// Store packed 16-bit integers from a into memory using writemask k.
@ -5812,7 +5822,8 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m
#[cfg_attr(test, assert_instr(vmovdqu16))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m128i) {
storedqu16_128(mem_addr, a.as_i16x8(), mask)
let mask = simd_select_bitmask(mask, i16x8::splat(!0), i16x8::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i16x8());
}
/// Store packed 8-bit integers from a into memory using writemask k.
@ -5824,7 +5835,8 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12
#[cfg_attr(test, assert_instr(vmovdqu8))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128i) {
storedqu8_128(mem_addr, a.as_i8x16(), mask)
let mask = simd_select_bitmask(mask, i8x16::splat(!0), i8x16::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i8x16());
}
/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst.
@ -5943,7 +5955,7 @@ pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpmaddubsw))]
pub fn _mm512_maddubs_epi16(a: __m512i, b: __m512i) -> __m512i {
unsafe { transmute(vpmaddubsw(a.as_i8x64(), b.as_i8x64())) }
unsafe { transmute(vpmaddubsw(a.as_u8x64(), b.as_i8x64())) }
}
/// Multiply packed unsigned 8-bit integers in a by packed signed 8-bit integers in b, producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -6852,7 +6864,12 @@ pub fn _mm_maskz_slli_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsllvw))]
pub fn _mm512_sllv_epi16(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(vpsllvw(a.as_i16x32(), count.as_i16x32())) }
unsafe {
let count = count.as_u16x32();
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
let count = simd_select(no_overflow, count, u16x32::ZERO);
simd_select(no_overflow, simd_shl(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
}
}
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -6891,7 +6908,12 @@ pub fn _mm512_maskz_sllv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsllvw))]
pub fn _mm256_sllv_epi16(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(vpsllvw256(a.as_i16x16(), count.as_i16x16())) }
unsafe {
let count = count.as_u16x16();
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
let count = simd_select(no_overflow, count, u16x16::ZERO);
simd_select(no_overflow, simd_shl(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
}
}
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -6930,7 +6952,12 @@ pub fn _mm256_maskz_sllv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsllvw))]
pub fn _mm_sllv_epi16(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(vpsllvw128(a.as_i16x8(), count.as_i16x8())) }
unsafe {
let count = count.as_u16x8();
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
let count = simd_select(no_overflow, count, u16x8::ZERO);
simd_select(no_overflow, simd_shl(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
}
}
/// Shift packed 16-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -7188,7 +7215,12 @@ pub fn _mm_maskz_srli_epi16<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsrlvw))]
pub fn _mm512_srlv_epi16(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(vpsrlvw(a.as_i16x32(), count.as_i16x32())) }
unsafe {
let count = count.as_u16x32();
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
let count = simd_select(no_overflow, count, u16x32::ZERO);
simd_select(no_overflow, simd_shr(a.as_u16x32(), count), u16x32::ZERO).as_m512i()
}
}
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -7227,7 +7259,12 @@ pub fn _mm512_maskz_srlv_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsrlvw))]
pub fn _mm256_srlv_epi16(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(vpsrlvw256(a.as_i16x16(), count.as_i16x16())) }
unsafe {
let count = count.as_u16x16();
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
let count = simd_select(no_overflow, count, u16x16::ZERO);
simd_select(no_overflow, simd_shr(a.as_u16x16(), count), u16x16::ZERO).as_m256i()
}
}
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -7266,7 +7303,12 @@ pub fn _mm256_maskz_srlv_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsrlvw))]
pub fn _mm_srlv_epi16(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(vpsrlvw128(a.as_i16x8(), count.as_i16x8())) }
unsafe {
let count = count.as_u16x8();
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
let count = simd_select(no_overflow, count, u16x8::ZERO);
simd_select(no_overflow, simd_shr(a.as_u16x8(), count), u16x8::ZERO).as_m128i()
}
}
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -7511,7 +7553,12 @@ pub fn _mm_maskz_srai_epi16<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsravw))]
pub fn _mm512_srav_epi16(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(vpsravw(a.as_i16x32(), count.as_i16x32())) }
unsafe {
let count = count.as_u16x32();
let no_overflow: u16x32 = simd_lt(count, u16x32::splat(u16::BITS as u16));
let count = simd_select(no_overflow, transmute(count), i16x32::splat(15));
simd_shr(a.as_i16x32(), count).as_m512i()
}
}
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -7550,7 +7597,12 @@ pub fn _mm512_maskz_srav_epi16(k: __mmask32, a: __m512i, count: __m512i) -> __m5
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsravw))]
pub fn _mm256_srav_epi16(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(vpsravw256(a.as_i16x16(), count.as_i16x16())) }
unsafe {
let count = count.as_u16x16();
let no_overflow: u16x16 = simd_lt(count, u16x16::splat(u16::BITS as u16));
let count = simd_select(no_overflow, transmute(count), i16x16::splat(15));
simd_shr(a.as_i16x16(), count).as_m256i()
}
}
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -7589,7 +7641,12 @@ pub fn _mm256_maskz_srav_epi16(k: __mmask16, a: __m256i, count: __m256i) -> __m2
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsravw))]
pub fn _mm_srav_epi16(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(vpsravw128(a.as_i16x8(), count.as_i16x8())) }
unsafe {
let count = count.as_u16x8();
let no_overflow: u16x8 = simd_lt(count, u16x8::splat(u16::BITS as u16));
let count = simd_select(no_overflow, transmute(count), i16x8::splat(15));
simd_shr(a.as_i16x8(), count).as_m128i()
}
}
/// Shift packed 16-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -11631,7 +11688,7 @@ unsafe extern "C" {
fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
#[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
fn vpmaddubsw(a: i8x64, b: i8x64) -> i16x32;
fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
#[link_name = "llvm.x86.avx512.packssdw.512"]
fn vpackssdw(a: i32x16, b: i32x16) -> i16x32;
@ -11645,33 +11702,12 @@ unsafe extern "C" {
#[link_name = "llvm.x86.avx512.psll.w.512"]
fn vpsllw(a: i16x32, count: i16x8) -> i16x32;
#[link_name = "llvm.x86.avx512.psllv.w.512"]
fn vpsllvw(a: i16x32, b: i16x32) -> i16x32;
#[link_name = "llvm.x86.avx512.psllv.w.256"]
fn vpsllvw256(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx512.psllv.w.128"]
fn vpsllvw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.avx512.psrl.w.512"]
fn vpsrlw(a: i16x32, count: i16x8) -> i16x32;
#[link_name = "llvm.x86.avx512.psrlv.w.512"]
fn vpsrlvw(a: i16x32, b: i16x32) -> i16x32;
#[link_name = "llvm.x86.avx512.psrlv.w.256"]
fn vpsrlvw256(a: i16x16, b: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx512.psrlv.w.128"]
fn vpsrlvw128(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.avx512.psra.w.512"]
fn vpsraw(a: i16x32, count: i16x8) -> i16x32;
#[link_name = "llvm.x86.avx512.psrav.w.512"]
fn vpsravw(a: i16x32, count: i16x32) -> i16x32;
#[link_name = "llvm.x86.avx512.psrav.w.256"]
fn vpsravw256(a: i16x16, count: i16x16) -> i16x16;
#[link_name = "llvm.x86.avx512.psrav.w.128"]
fn vpsravw128(a: i16x8, count: i16x8) -> i16x8;
#[link_name = "llvm.x86.avx512.vpermi2var.hi.512"]
fn vpermi2w(a: i16x32, idx: i16x32, b: i16x32) -> i16x32;
#[link_name = "llvm.x86.avx512.vpermi2var.hi.256"]
@ -11733,33 +11769,6 @@ unsafe extern "C" {
fn vpmovuswbmem256(mem_addr: *mut i8, a: i16x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmovus.wb.mem.128"]
fn vpmovuswbmem128(mem_addr: *mut i8, a: i16x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.loadu.b.128"]
fn loaddqu8_128(mem_addr: *const i8, a: i8x16, mask: u16) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.loadu.w.128"]
fn loaddqu16_128(mem_addr: *const i16, a: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.loadu.b.256"]
fn loaddqu8_256(mem_addr: *const i8, a: i8x32, mask: u32) -> i8x32;
#[link_name = "llvm.x86.avx512.mask.loadu.w.256"]
fn loaddqu16_256(mem_addr: *const i16, a: i16x16, mask: u16) -> i16x16;
#[link_name = "llvm.x86.avx512.mask.loadu.b.512"]
fn loaddqu8_512(mem_addr: *const i8, a: i8x64, mask: u64) -> i8x64;
#[link_name = "llvm.x86.avx512.mask.loadu.w.512"]
fn loaddqu16_512(mem_addr: *const i16, a: i16x32, mask: u32) -> i16x32;
#[link_name = "llvm.x86.avx512.mask.storeu.b.128"]
fn storedqu8_128(mem_addr: *mut i8, a: i8x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.storeu.w.128"]
fn storedqu16_128(mem_addr: *mut i16, a: i16x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.storeu.b.256"]
fn storedqu8_256(mem_addr: *mut i8, a: i8x32, mask: u32);
#[link_name = "llvm.x86.avx512.mask.storeu.w.256"]
fn storedqu16_256(mem_addr: *mut i16, a: i16x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.storeu.b.512"]
fn storedqu8_512(mem_addr: *mut i8, a: i8x64, mask: u64);
#[link_name = "llvm.x86.avx512.mask.storeu.w.512"]
fn storedqu16_512(mem_addr: *mut i16, a: i16x32, mask: u32);
}
#[cfg(test)]
@ -13326,7 +13335,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_max_epu16() {
#[rustfmt::skip]
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13343,7 +13352,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_maskz_max_epu16() {
#[rustfmt::skip]
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13360,7 +13369,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_max_epu16() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -13371,7 +13380,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_max_epu16() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -13382,7 +13391,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_max_epu16() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
@ -13393,7 +13402,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_maskz_max_epu16() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
@ -13425,7 +13434,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_max_epu8() {
#[rustfmt::skip]
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13453,7 +13462,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_maskz_max_epu8() {
#[rustfmt::skip]
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13480,7 +13489,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_max_epu8() {
#[rustfmt::skip]
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13497,7 +13506,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_max_epu8() {
#[rustfmt::skip]
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13514,7 +13523,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_max_epu8() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -13525,7 +13534,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_maskz_max_epu8() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -13551,7 +13560,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_max_epi16() {
#[rustfmt::skip]
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13568,7 +13577,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_maskz_max_epi16() {
#[rustfmt::skip]
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13585,7 +13594,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_max_epi16() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -13596,7 +13605,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_max_epi16() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -13607,7 +13616,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_max_epi16() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
@ -13618,7 +13627,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_maskz_max_epi16() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
@ -13650,7 +13659,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_max_epi8() {
#[rustfmt::skip]
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13678,7 +13687,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_maskz_max_epi8() {
#[rustfmt::skip]
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13705,7 +13714,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_max_epi8() {
#[rustfmt::skip]
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13722,7 +13731,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_max_epi8() {
#[rustfmt::skip]
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13739,7 +13748,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_max_epi8() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -13750,7 +13759,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_maskz_max_epi8() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -13776,7 +13785,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_min_epu16() {
#[rustfmt::skip]
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13793,7 +13802,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_maskz_min_epu16() {
#[rustfmt::skip]
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13810,7 +13819,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_min_epu16() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -13821,7 +13830,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_min_epu16() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -13832,7 +13841,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_min_epu16() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
@ -13843,7 +13852,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_maskz_min_epu16() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
@ -13875,7 +13884,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_min_epu8() {
#[rustfmt::skip]
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13903,7 +13912,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_maskz_min_epu8() {
#[rustfmt::skip]
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13930,7 +13939,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_min_epu8() {
#[rustfmt::skip]
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13947,7 +13956,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_min_epu8() {
#[rustfmt::skip]
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -13964,7 +13973,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_min_epu8() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -13975,7 +13984,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_maskz_min_epu8() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -14001,7 +14010,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_min_epi16() {
#[rustfmt::skip]
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -14018,7 +14027,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_maskz_min_epi16() {
#[rustfmt::skip]
let a = _mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -14035,7 +14044,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_min_epi16() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -14046,7 +14055,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_min_epi16() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -14057,7 +14066,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_min_epi16() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
@ -14068,7 +14077,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_maskz_min_epi16() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm_set_epi16(7, 6, 5, 4, 3, 2, 1, 0);
@ -14100,7 +14109,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_min_epi8() {
#[rustfmt::skip]
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -14128,7 +14137,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_maskz_min_epi8() {
#[rustfmt::skip]
let a = _mm512_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -14155,7 +14164,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_min_epi8() {
#[rustfmt::skip]
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -14172,7 +14181,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_min_epi8() {
#[rustfmt::skip]
let a = _mm256_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
@ -14189,7 +14198,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_min_epi8() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -14200,7 +14209,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_maskz_min_epi8() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
@ -16317,7 +16326,7 @@ mod tests {
assert_eq_m128i(r, a);
}
#[simd_test(enable = "avx512f,avx512bw")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_loadu_epi16() {
let src = _mm512_set1_epi16(42);
let a = &[
@ -16335,7 +16344,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_maskz_loadu_epi16() {
let a = &[
1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@ -16352,7 +16361,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_storeu_epi16() {
let mut r = [42_i16; 32];
let a = &[
@ -16370,7 +16379,7 @@ mod tests {
assert_eq_m512i(_mm512_loadu_epi16(r.as_ptr()), e);
}
#[simd_test(enable = "avx512f,avx512bw")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_loadu_epi8() {
let src = _mm512_set1_epi8(42);
let a = &[
@ -16390,7 +16399,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_maskz_loadu_epi8() {
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@ -16409,7 +16418,7 @@ mod tests {
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw")]
#[simd_test(enable = "avx512bw")]
unsafe fn test_mm512_mask_storeu_epi8() {
let mut r = [42_i8; 64];
let a = &[
@ -16429,7 +16438,7 @@ mod tests {
assert_eq_m512i(_mm512_loadu_epi8(r.as_ptr()), e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_loadu_epi16() {
let src = _mm256_set1_epi16(42);
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
@ -16443,7 +16452,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_loadu_epi16() {
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let p = a.as_ptr();
@ -16454,7 +16463,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_storeu_epi16() {
let mut r = [42_i16; 16];
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
@ -16468,7 +16477,7 @@ mod tests {
assert_eq_m256i(_mm256_loadu_epi16(r.as_ptr()), e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_loadu_epi8() {
let src = _mm256_set1_epi8(42);
let a = &[
@ -16486,7 +16495,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_loadu_epi8() {
let a = &[
1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
@ -16503,7 +16512,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm256_mask_storeu_epi8() {
let mut r = [42_i8; 32];
let a = &[
@ -16521,7 +16530,7 @@ mod tests {
assert_eq_m256i(_mm256_loadu_epi8(r.as_ptr()), e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_loadu_epi16() {
let src = _mm_set1_epi16(42);
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
@ -16533,7 +16542,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_maskz_loadu_epi16() {
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
let p = a.as_ptr();
@ -16544,7 +16553,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_storeu_epi16() {
let mut r = [42_i16; 8];
let a = &[1_i16, 2, 3, 4, 5, 6, 7, 8];
@ -16556,7 +16565,7 @@ mod tests {
assert_eq_m128i(_mm_loadu_epi16(r.as_ptr()), e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_loadu_epi8() {
let src = _mm_set1_epi8(42);
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
@ -16570,7 +16579,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_maskz_loadu_epi8() {
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let p = a.as_ptr();
@ -16581,7 +16590,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512bw,avx512vl")]
#[simd_test(enable = "avx512bw,avx512vl")]
unsafe fn test_mm_mask_storeu_epi8() {
let mut r = [42_i8; 16];
let a = &[1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];

View file

@ -20940,7 +20940,12 @@ pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) }
unsafe {
let count = count.as_u32x16();
let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
let count = simd_select(no_overflow, transmute(count), i32x16::splat(31));
simd_shr(a.as_i32x16(), count).as_m512i()
}
}
/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -21035,7 +21040,12 @@ pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) }
unsafe {
let count = count.as_u64x8();
let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
let count = simd_select(no_overflow, transmute(count), i64x8::splat(63));
simd_shr(a.as_i64x8(), count).as_m512i()
}
}
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -21074,7 +21084,12 @@ pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m51
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
unsafe { transmute(vpsravq256(a.as_i64x4(), count.as_i64x4())) }
unsafe {
let count = count.as_u64x4();
let no_overflow: u64x4 = simd_lt(count, u64x4::splat(u64::BITS as u64));
let count = simd_select(no_overflow, transmute(count), i64x4::splat(63));
simd_shr(a.as_i64x4(), count).as_m256i()
}
}
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -21113,7 +21128,12 @@ pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m25
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
unsafe { transmute(vpsravq128(a.as_i64x2(), count.as_i64x2())) }
unsafe {
let count = count.as_u64x2();
let no_overflow: u64x2 = simd_lt(count, u64x2::splat(u64::BITS as u64));
let count = simd_select(no_overflow, transmute(count), i64x2::splat(63));
simd_shr(a.as_i64x2(), count).as_m128i()
}
}
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -21692,7 +21712,12 @@ pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) }
unsafe {
let count = count.as_u32x16();
let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
let count = simd_select(no_overflow, count, u32x16::ZERO);
simd_select(no_overflow, simd_shl(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
}
}
/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -21787,7 +21812,12 @@ pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) }
unsafe {
let count = count.as_u32x16();
let no_overflow: u32x16 = simd_lt(count, u32x16::splat(u32::BITS));
let count = simd_select(no_overflow, count, u32x16::ZERO);
simd_select(no_overflow, simd_shr(a.as_u32x16(), count), u32x16::ZERO).as_m512i()
}
}
/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -21882,7 +21912,12 @@ pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) }
unsafe {
let count = count.as_u64x8();
let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
let count = simd_select(no_overflow, count, u64x8::ZERO);
simd_select(no_overflow, simd_shl(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
}
}
/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -21977,7 +22012,12 @@ pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
unsafe { transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) }
unsafe {
let count = count.as_u64x8();
let no_overflow: u64x8 = simd_lt(count, u64x8::splat(u64::BITS as u64));
let count = simd_select(no_overflow, count, u64x8::ZERO);
simd_select(no_overflow, simd_shr(a.as_u64x8(), count), u64x8::ZERO).as_m512i()
}
}
/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@ -34715,7 +34755,8 @@ pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
#[cfg_attr(test, assert_instr(vmovdqu32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
transmute(loaddqu32_512(mem_addr, src.as_i32x16(), k))
let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x16()).as_m512i()
}
/// Load packed 32-bit integers from memory into dst using zeromask k
@ -34741,7 +34782,8 @@ pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __
#[cfg_attr(test, assert_instr(vmovdqu64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
transmute(loaddqu64_512(mem_addr, src.as_i64x8(), k))
let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x8()).as_m512i()
}
/// Load packed 64-bit integers from memory into dst using zeromask k
@ -34767,7 +34809,8 @@ pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m
#[cfg_attr(test, assert_instr(vmovups))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
transmute(loadups_512(mem_addr, src.as_f32x16(), k))
let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x16()).as_m512()
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@ -34793,7 +34836,8 @@ pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m51
#[cfg_attr(test, assert_instr(vmovupd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
transmute(loadupd_512(mem_addr, src.as_f64x8(), k))
let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x8()).as_m512d()
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@ -34819,7 +34863,8 @@ pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512
#[cfg_attr(test, assert_instr(vmovdqu32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
transmute(loaddqu32_256(mem_addr, src.as_i32x8(), k))
let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x8()).as_m256i()
}
/// Load packed 32-bit integers from memory into dst using zeromask k
@ -34845,7 +34890,8 @@ pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m
#[cfg_attr(test, assert_instr(vmovdqu64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
transmute(loaddqu64_256(mem_addr, src.as_i64x4(), k))
let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x4()).as_m256i()
}
/// Load packed 64-bit integers from memory into dst using zeromask k
@ -34871,7 +34917,8 @@ pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m
#[cfg_attr(test, assert_instr(vmovups))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
transmute(loadups_256(mem_addr, src.as_f32x8(), k))
let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x8()).as_m256()
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@ -34897,7 +34944,8 @@ pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256
#[cfg_attr(test, assert_instr(vmovupd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
transmute(loadupd_256(mem_addr, src.as_f64x4(), k))
let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x4()).as_m256d()
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@ -34923,7 +34971,8 @@ pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256
#[cfg_attr(test, assert_instr(vmovdqu32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
transmute(loaddqu32_128(mem_addr, src.as_i32x4(), k))
let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i32x4()).as_m128i()
}
/// Load packed 32-bit integers from memory into dst using zeromask k
@ -34949,7 +34998,8 @@ pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128
#[cfg_attr(test, assert_instr(vmovdqu64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
transmute(loaddqu64_128(mem_addr, src.as_i64x2(), k))
let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_i64x2()).as_m128i()
}
/// Load packed 64-bit integers from memory into dst using zeromask k
@ -34975,7 +35025,8 @@ pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128
#[cfg_attr(test, assert_instr(vmovups))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
transmute(loadups_128(mem_addr, src.as_f32x4(), k))
let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f32x4()).as_m128()
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@ -35001,7 +35052,8 @@ pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
#[cfg_attr(test, assert_instr(vmovupd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
transmute(loadupd_128(mem_addr, src.as_f64x2(), k))
let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
simd_masked_load!(SimdAlign::Unaligned, mask, mem_addr, src.as_f64x2()).as_m128d()
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@ -35027,7 +35079,8 @@ pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
#[cfg_attr(test, assert_instr(vmovdqa32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
transmute(loaddqa32_512(mem_addr, src.as_i32x16(), k))
let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x16()).as_m512i()
}
/// Load packed 32-bit integers from memory into dst using zeromask k
@ -35053,7 +35106,8 @@ pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m
#[cfg_attr(test, assert_instr(vmovdqa64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
transmute(loaddqa64_512(mem_addr, src.as_i64x8(), k))
let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x8()).as_m512i()
}
/// Load packed 64-bit integers from memory into dst using zeromask k
@ -35079,7 +35133,8 @@ pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m5
#[cfg_attr(test, assert_instr(vmovaps))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
transmute(loadaps_512(mem_addr, src.as_f32x16(), k))
let mask = simd_select_bitmask(k, i32x16::splat(!0), i32x16::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x16()).as_m512()
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@ -35105,7 +35160,8 @@ pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512
#[cfg_attr(test, assert_instr(vmovapd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
transmute(loadapd_512(mem_addr, src.as_f64x8(), k))
let mask = simd_select_bitmask(k, i64x8::splat(!0), i64x8::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x8()).as_m512d()
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@ -35131,7 +35187,8 @@ pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d
#[cfg_attr(test, assert_instr(vmovdqa32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
transmute(loaddqa32_256(mem_addr, src.as_i32x8(), k))
let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x8()).as_m256i()
}
/// Load packed 32-bit integers from memory into dst using zeromask k
@ -35157,7 +35214,8 @@ pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m2
#[cfg_attr(test, assert_instr(vmovdqa64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
transmute(loaddqa64_256(mem_addr, src.as_i64x4(), k))
let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x4()).as_m256i()
}
/// Load packed 64-bit integers from memory into dst using zeromask k
@ -35183,7 +35241,8 @@ pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m2
#[cfg_attr(test, assert_instr(vmovaps))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
transmute(loadaps_256(mem_addr, src.as_f32x8(), k))
let mask = simd_select_bitmask(k, i32x8::splat(!0), i32x8::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x8()).as_m256()
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@ -35209,7 +35268,8 @@ pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256
#[cfg_attr(test, assert_instr(vmovapd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
transmute(loadapd_256(mem_addr, src.as_f64x4(), k))
let mask = simd_select_bitmask(k, i64x4::splat(!0), i64x4::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x4()).as_m256d()
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@ -35235,7 +35295,8 @@ pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d
#[cfg_attr(test, assert_instr(vmovdqa32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
transmute(loaddqa32_128(mem_addr, src.as_i32x4(), k))
let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i32x4()).as_m128i()
}
/// Load packed 32-bit integers from memory into dst using zeromask k
@ -35261,7 +35322,8 @@ pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i
#[cfg_attr(test, assert_instr(vmovdqa64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
transmute(loaddqa64_128(mem_addr, src.as_i64x2(), k))
let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_i64x2()).as_m128i()
}
/// Load packed 64-bit integers from memory into dst using zeromask k
@ -35287,7 +35349,8 @@ pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i
#[cfg_attr(test, assert_instr(vmovaps))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
transmute(loadaps_128(mem_addr, src.as_f32x4(), k))
let mask = simd_select_bitmask(k, i32x4::splat(!0), i32x4::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f32x4()).as_m128()
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@ -35313,7 +35376,8 @@ pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
#[cfg_attr(test, assert_instr(vmovapd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
transmute(loadapd_128(mem_addr, src.as_f64x2(), k))
let mask = simd_select_bitmask(k, i64x2::splat(!0), i64x2::ZERO);
simd_masked_load!(SimdAlign::Vector, mask, mem_addr, src.as_f64x2()).as_m128d()
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@ -35426,7 +35490,8 @@ pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
#[cfg_attr(test, assert_instr(vmovdqu32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
storedqu32_512(mem_addr, a.as_i32x16(), mask)
let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x16());
}
/// Store packed 64-bit integers from a into memory using writemask k.
@ -35438,7 +35503,8 @@ pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: _
#[cfg_attr(test, assert_instr(vmovdqu64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
storedqu64_512(mem_addr, a.as_i64x8(), mask)
let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x8());
}
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@ -35450,7 +35516,8 @@ pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __
#[cfg_attr(test, assert_instr(vmovups))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
storeups_512(mem_addr, a.as_f32x16(), mask)
let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x16());
}
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@ -35462,7 +35529,8 @@ pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m5
#[cfg_attr(test, assert_instr(vmovupd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
storeupd_512(mem_addr, a.as_f64x8(), mask)
let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x8());
}
/// Store packed 32-bit integers from a into memory using writemask k.
@ -35474,7 +35542,8 @@ pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m51
#[cfg_attr(test, assert_instr(vmovdqu32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
storedqu32_256(mem_addr, a.as_i32x8(), mask)
let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x8());
}
/// Store packed 64-bit integers from a into memory using writemask k.
@ -35486,7 +35555,8 @@ pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __
#[cfg_attr(test, assert_instr(vmovdqu64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
storedqu64_256(mem_addr, a.as_i64x4(), mask)
let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x4());
}
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@ -35498,7 +35568,8 @@ pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __
#[cfg_attr(test, assert_instr(vmovups))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
storeups_256(mem_addr, a.as_f32x8(), mask)
let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x8());
}
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@ -35510,7 +35581,8 @@ pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m25
#[cfg_attr(test, assert_instr(vmovupd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
storeupd_256(mem_addr, a.as_f64x4(), mask)
let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x4());
}
/// Store packed 32-bit integers from a into memory using writemask k.
@ -35522,7 +35594,8 @@ pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m25
#[cfg_attr(test, assert_instr(vmovdqu32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
storedqu32_128(mem_addr, a.as_i32x4(), mask)
let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i32x4());
}
/// Store packed 64-bit integers from a into memory using writemask k.
@ -35534,7 +35607,8 @@ pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m12
#[cfg_attr(test, assert_instr(vmovdqu64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
storedqu64_128(mem_addr, a.as_i64x2(), mask)
let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_i64x2());
}
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@ -35546,7 +35620,8 @@ pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m12
#[cfg_attr(test, assert_instr(vmovups))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
storeups_128(mem_addr, a.as_f32x4(), mask)
let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f32x4());
}
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@ -35558,7 +35633,8 @@ pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128)
#[cfg_attr(test, assert_instr(vmovupd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
storeupd_128(mem_addr, a.as_f64x2(), mask)
let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
simd_masked_store!(SimdAlign::Unaligned, mask, mem_addr, a.as_f64x2());
}
/// Store packed 32-bit integers from a into memory using writemask k.
@ -35570,7 +35646,8 @@ pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d)
#[cfg_attr(test, assert_instr(vmovdqa32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
storedqa32_512(mem_addr, a.as_i32x16(), mask)
let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x16());
}
/// Store packed 64-bit integers from a into memory using writemask k.
@ -35582,7 +35659,8 @@ pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __
#[cfg_attr(test, assert_instr(vmovdqa64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
storedqa64_512(mem_addr, a.as_i64x8(), mask)
let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x8());
}
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@ -35594,7 +35672,8 @@ pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m
#[cfg_attr(test, assert_instr(vmovaps))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
storeaps_512(mem_addr, a.as_f32x16(), mask)
let mask = simd_select_bitmask(mask, i32x16::splat(!0), i32x16::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x16());
}
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@ -35606,7 +35685,8 @@ pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m51
#[cfg_attr(test, assert_instr(vmovapd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
storeapd_512(mem_addr, a.as_f64x8(), mask)
let mask = simd_select_bitmask(mask, i64x8::splat(!0), i64x8::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x8());
}
/// Store packed 32-bit integers from a into memory using writemask k.
@ -35618,7 +35698,8 @@ pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512
#[cfg_attr(test, assert_instr(vmovdqa32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
storedqa32_256(mem_addr, a.as_i32x8(), mask)
let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x8());
}
/// Store packed 64-bit integers from a into memory using writemask k.
@ -35630,7 +35711,8 @@ pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m
#[cfg_attr(test, assert_instr(vmovdqa64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
storedqa64_256(mem_addr, a.as_i64x4(), mask)
let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x4());
}
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@ -35642,7 +35724,8 @@ pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m
#[cfg_attr(test, assert_instr(vmovaps))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
storeaps_256(mem_addr, a.as_f32x8(), mask)
let mask = simd_select_bitmask(mask, i32x8::splat(!0), i32x8::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x8());
}
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@ -35654,7 +35737,8 @@ pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256
#[cfg_attr(test, assert_instr(vmovapd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
storeapd_256(mem_addr, a.as_f64x4(), mask)
let mask = simd_select_bitmask(mask, i64x4::splat(!0), i64x4::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x4());
}
/// Store packed 32-bit integers from a into memory using writemask k.
@ -35666,7 +35750,8 @@ pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256
#[cfg_attr(test, assert_instr(vmovdqa32))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
storedqa32_128(mem_addr, a.as_i32x4(), mask)
let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i32x4());
}
/// Store packed 64-bit integers from a into memory using writemask k.
@ -35678,7 +35763,8 @@ pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128
#[cfg_attr(test, assert_instr(vmovdqa64))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
storedqa64_128(mem_addr, a.as_i64x2(), mask)
let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_i64x2());
}
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@ -35690,7 +35776,8 @@ pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128
#[cfg_attr(test, assert_instr(vmovaps))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
storeaps_128(mem_addr, a.as_f32x4(), mask)
let mask = simd_select_bitmask(mask, i32x4::splat(!0), i32x4::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f32x4());
}
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@ -35702,7 +35789,8 @@ pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
#[cfg_attr(test, assert_instr(vmovapd))]
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
storeapd_128(mem_addr, a.as_f64x2(), mask)
let mask = simd_select_bitmask(mask, i64x2::splat(!0), i64x2::ZERO);
simd_masked_store!(SimdAlign::Vector, mask, mem_addr, a.as_f64x2());
}
/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
@ -42833,15 +42921,6 @@ unsafe extern "C" {
#[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.psllv.d.512"]
fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.psrlv.d.512"]
fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.psllv.q.512"]
fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.psrlv.q.512"]
fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.psll.d.512"]
fn vpslld(a: i32x16, count: i32x4) -> i32x16;
#[link_name = "llvm.x86.avx512.psrl.d.512"]
@ -42861,16 +42940,6 @@ unsafe extern "C" {
#[link_name = "llvm.x86.avx512.psra.q.128"]
fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx512.psrav.d.512"]
fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.psrav.q.512"]
fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.psrav.q.256"]
fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx512.psrav.q.128"]
fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
#[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
@ -43109,106 +43178,6 @@ unsafe extern "C" {
#[link_name = "llvm.x86.avx512.vcomi.sd"]
fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
#[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
fn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
fn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
#[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
fn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
fn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
fn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
fn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
#[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
fn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
fn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
fn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
fn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
fn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
fn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.load.d.128"]
fn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.load.q.128"]
fn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
#[link_name = "llvm.x86.avx512.mask.load.ps.128"]
fn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.load.pd.128"]
fn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.load.d.256"]
fn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.load.q.256"]
fn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
#[link_name = "llvm.x86.avx512.mask.load.ps.256"]
fn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.mask.load.pd.256"]
fn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.mask.load.d.512"]
fn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.load.q.512"]
fn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.load.ps.512"]
fn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.load.pd.512"]
fn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
fn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
fn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
fn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
fn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
fn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
fn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
fn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
fn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
fn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
fn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
fn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
fn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.store.d.128"]
fn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.store.q.128"]
fn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.store.ps.128"]
fn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.store.pd.128"]
fn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.store.d.256"]
fn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.store.q.256"]
fn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.store.ps.256"]
fn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.store.pd.256"]
fn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.store.d.512"]
fn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.store.q.512"]
fn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.store.ps.512"]
fn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.store.pd.512"]
fn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
fn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
@ -46240,11 +46209,25 @@ mod tests {
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_ternarylogic_epi32() {
let a = _mm512_set1_epi32(1 << 2);
let b = _mm512_set1_epi32(1 << 1);
let c = _mm512_set1_epi32(1 << 0);
let r = _mm512_ternarylogic_epi32::<8>(a, b, c);
let e = _mm512_set1_epi32(0);
use core::intrinsics::simd::simd_xor;
let a = _mm512_set4_epi32(0b100, 0b110, 0b001, 0b101);
let b = _mm512_set4_epi32(0b010, 0b011, 0b001, 0b110);
let c = _mm512_set4_epi32(0b001, 0b000, 0b001, 0b111);
// Identity of A.
let r = _mm512_ternarylogic_epi32::<0b1111_0000>(a, b, c);
assert_eq_m512i(r, a);
// Bitwise xor.
let r = _mm512_ternarylogic_epi32::<0b10010110>(a, b, c);
let e = _mm512_set4_epi32(0b111, 0b101, 0b001, 0b100);
assert_eq_m512i(r, e);
assert_eq_m512i(r, simd_xor(simd_xor(a, b), c));
// Majority (2 or more bits set).
let r = _mm512_ternarylogic_epi32::<0b1110_1000>(a, b, c);
let e = _mm512_set4_epi32(0b000, 0b010, 0b001, 0b111);
assert_eq_m512i(r, e);
}
@ -46274,11 +46257,27 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_ternarylogic_epi32() {
let a = _mm256_set1_epi32(1 << 2);
let b = _mm256_set1_epi32(1 << 1);
let c = _mm256_set1_epi32(1 << 0);
let r = _mm256_ternarylogic_epi32::<8>(a, b, c);
let e = _mm256_set1_epi32(0);
use core::intrinsics::simd::simd_xor;
let _mm256_set4_epi32 = |a, b, c, d| _mm256_setr_epi32(a, b, c, d, a, b, c, d);
let a = _mm256_set4_epi32(0b100, 0b110, 0b001, 0b101);
let b = _mm256_set4_epi32(0b010, 0b011, 0b001, 0b110);
let c = _mm256_set4_epi32(0b001, 0b000, 0b001, 0b111);
// Identity of A.
let r = _mm256_ternarylogic_epi32::<0b1111_0000>(a, b, c);
assert_eq_m256i(r, a);
// Bitwise xor.
let r = _mm256_ternarylogic_epi32::<0b10010110>(a, b, c);
let e = _mm256_set4_epi32(0b111, 0b101, 0b001, 0b100);
assert_eq_m256i(r, e);
assert_eq_m256i(r, simd_xor(simd_xor(a, b), c));
// Majority (2 or more bits set).
let r = _mm256_ternarylogic_epi32::<0b1110_1000>(a, b, c);
let e = _mm256_set4_epi32(0b000, 0b010, 0b001, 0b111);
assert_eq_m256i(r, e);
}
@ -46308,11 +46307,25 @@ mod tests {
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_ternarylogic_epi32() {
let a = _mm_set1_epi32(1 << 2);
let b = _mm_set1_epi32(1 << 1);
let c = _mm_set1_epi32(1 << 0);
let r = _mm_ternarylogic_epi32::<8>(a, b, c);
let e = _mm_set1_epi32(0);
use core::intrinsics::simd::simd_xor;
let a = _mm_setr_epi32(0b100, 0b110, 0b001, 0b101);
let b = _mm_setr_epi32(0b010, 0b011, 0b001, 0b110);
let c = _mm_setr_epi32(0b001, 0b000, 0b001, 0b111);
// Identity of A.
let r = _mm_ternarylogic_epi32::<0b1111_0000>(a, b, c);
assert_eq_m128i(r, a);
// Bitwise xor.
let r = _mm_ternarylogic_epi32::<0b10010110>(a, b, c);
let e = _mm_setr_epi32(0b111, 0b101, 0b001, 0b100);
assert_eq_m128i(r, e);
assert_eq_m128i(r, simd_xor(simd_xor(a, b), c));
// Majority (2 or more bits set).
let r = _mm_ternarylogic_epi32::<0b1110_1000>(a, b, c);
let e = _mm_setr_epi32(0b000, 0b010, 0b001, 0b111);
assert_eq_m128i(r, e);
}
@ -56063,7 +56076,7 @@ mod tests {
assert_eq!(r, e);
}
#[simd_test(enable = "avx512dq")]
#[simd_test(enable = "avx512f")]
unsafe fn test_kortest_mask16_u8() {
let a: __mmask16 = 0b0110100101101001;
let b: __mmask16 = 0b1011011010110110;
@ -56073,7 +56086,7 @@ mod tests {
assert_eq!(all_ones, 1);
}
#[simd_test(enable = "avx512dq")]
#[simd_test(enable = "avx512f")]
unsafe fn test_kortestc_mask16_u8() {
let a: __mmask16 = 0b0110100101101001;
let b: __mmask16 = 0b1011011010110110;
@ -56081,7 +56094,7 @@ mod tests {
assert_eq!(r, 1);
}
#[simd_test(enable = "avx512dq")]
#[simd_test(enable = "avx512f")]
unsafe fn test_kortestz_mask16_u8() {
let a: __mmask16 = 0b0110100101101001;
let b: __mmask16 = 0b1011011010110110;
@ -56089,7 +56102,7 @@ mod tests {
assert_eq!(r, 0);
}
#[simd_test(enable = "avx512dq")]
#[simd_test(enable = "avx512f")]
unsafe fn test_kshiftli_mask16() {
let a: __mmask16 = 0b1001011011000011;
let r = _kshiftli_mask16::<3>(a);
@ -56109,7 +56122,7 @@ mod tests {
assert_eq!(r, e);
}
#[simd_test(enable = "avx512dq")]
#[simd_test(enable = "avx512f")]
unsafe fn test_kshiftri_mask16() {
let a: __mmask16 = 0b1010100100111100;
let r = _kshiftri_mask16::<3>(a);
@ -57370,7 +57383,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_set1_epi32() {
let a: i32 = 11;
let r = _mm256_maskz_set1_epi32(0, a);
@ -57391,7 +57404,7 @@ mod tests {
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_set1_epi32() {
let a: i32 = 11;
let r = _mm_maskz_set1_epi32(0, a);

View file

@ -20766,7 +20766,7 @@ mod tests {
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm_mask_fmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@ -20783,7 +20783,7 @@ mod tests {
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm_mask3_fmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@ -20800,7 +20800,7 @@ mod tests {
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm_maskz_fmsub_round_sh() {
let a = _mm_setr_ph(1.0, 10., 11., 12., 13., 14., 15., 16.);
let b = _mm_setr_ph(2.0, 20., 21., 22., 23., 24., 25., 26.);
@ -24529,7 +24529,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_mask_cvtepi32_ph() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let src = _mm256_set_ph(
@ -24542,7 +24542,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_maskz_cvtepi32_ph() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_maskz_cvtepi32_ph(0b0101010101010101, a);
@ -24552,7 +24552,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_cvt_roundepi32_ph() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
@ -24562,7 +24562,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_mask_cvt_roundepi32_ph() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let src = _mm256_set_ph(
@ -24579,7 +24579,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_maskz_cvt_roundepi32_ph() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_maskz_cvt_roundepi32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@ -24658,7 +24658,7 @@ mod tests {
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_cvtepu32_ph() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_cvtepu32_ph(a);
@ -24668,7 +24668,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_mask_cvtepu32_ph() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let src = _mm256_set_ph(
@ -24681,7 +24681,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_maskz_cvtepu32_ph() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_maskz_cvtepu32_ph(0b0101010101010101, a);
@ -24691,7 +24691,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_cvt_roundepu32_ph() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
@ -24701,7 +24701,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_mask_cvt_roundepu32_ph() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let src = _mm256_set_ph(
@ -24719,7 +24719,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_maskz_cvt_roundepu32_ph() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_maskz_cvt_roundepu32_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
@ -25006,7 +25006,7 @@ mod tests {
assert_eq_m128h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_cvtxps_ph() {
let a = _mm512_set_ps(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
@ -25018,7 +25018,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_mask_cvtxps_ph() {
let a = _mm512_set_ps(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
@ -25033,7 +25033,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_maskz_cvtxps_ph() {
let a = _mm512_set_ps(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
@ -25045,7 +25045,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_cvtx_roundps_ph() {
let a = _mm512_set_ps(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
@ -25057,7 +25057,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_mask_cvtx_roundps_ph() {
let a = _mm512_set_ps(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
@ -25077,7 +25077,7 @@ mod tests {
assert_eq_m256h(r, e);
}
#[simd_test(enable = "avx512fp16")]
#[simd_test(enable = "avx512fp16,avx512vl")]
unsafe fn test_mm512_maskz_cvtx_roundps_ph() {
let a = _mm512_set_ps(
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,

View file

@ -64,16 +64,16 @@ mod tests {
use crate::core_arch::x86::*;
use stdarch_test::simd_test;
#[simd_test(enable = "sse2")]
unsafe fn test_rdtsc() {
let r = _rdtsc();
#[test]
fn test_rdtsc() {
let r = unsafe { _rdtsc() };
assert_ne!(r, 0); // The chances of this being 0 are infinitesimal
}
#[simd_test(enable = "sse2")]
unsafe fn test_rdtscp() {
#[test]
fn test_rdtscp() {
let mut aux = 0;
let r = __rdtscp(&mut aux);
let r = unsafe { __rdtscp(&mut aux) };
assert_ne!(r, 0); // The chances of this being 0 are infinitesimal
}
}

View file

@ -3052,8 +3052,9 @@ mod tests {
assert_eq_m128(r, _mm_set1_ps(0.0));
}
#[simd_test(enable = "sse")]
unsafe fn test_MM_SHUFFLE() {
#[test]
#[allow(non_snake_case)]
fn test_MM_SHUFFLE() {
assert_eq!(_MM_SHUFFLE(0, 1, 1, 3), 0b00_01_01_11);
assert_eq!(_MM_SHUFFLE(3, 1, 1, 0), 0b11_01_01_00);
assert_eq!(_MM_SHUFFLE(1, 2, 2, 1), 0b01_10_10_01);

View file

@ -1,3 +1,5 @@
use crate::core_arch::{simd::*, x86::*};
#[cfg(test)]
use stdarch_test::assert_instr;
@ -242,6 +244,206 @@ pub unsafe fn _tile_cmmrlfp16ps<const DST: i32, const A: i32, const B: i32>() {
tcmmrlfp16ps(DST as i8, A as i8, B as i8);
}
/// Compute dot-product of BF8 (8-bit E5M2) floating-point elements in tile a and BF8 (8-bit E5M2)
/// floating-point elements in tile b, accumulating the intermediate single-precision
/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
/// back to tile dst.
#[inline]
#[rustc_legacy_const_generics(0, 1, 2)]
#[target_feature(enable = "amx-fp8")]
#[cfg_attr(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(tdpbf8ps, DST = 0, A = 1, B = 2)
)]
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
pub unsafe fn _tile_dpbf8ps<const DST: i32, const A: i32, const B: i32>() {
static_assert_uimm_bits!(DST, 3);
static_assert_uimm_bits!(A, 3);
static_assert_uimm_bits!(B, 3);
tdpbf8ps(DST as i8, A as i8, B as i8);
}
/// Compute dot-product of BF8 (8-bit E5M2) floating-point elements in tile a and HF8
/// (8-bit E4M3) floating-point elements in tile b, accumulating the intermediate single-precision
/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
/// back to tile dst.
#[inline]
#[rustc_legacy_const_generics(0, 1, 2)]
#[target_feature(enable = "amx-fp8")]
#[cfg_attr(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(tdpbhf8ps, DST = 0, A = 1, B = 2)
)]
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
pub unsafe fn _tile_dpbhf8ps<const DST: i32, const A: i32, const B: i32>() {
static_assert_uimm_bits!(DST, 3);
static_assert_uimm_bits!(A, 3);
static_assert_uimm_bits!(B, 3);
tdpbhf8ps(DST as i8, A as i8, B as i8);
}
/// Compute dot-product of HF8 (8-bit E4M3) floating-point elements in tile a and BF8
/// (8-bit E5M2) floating-point elements in tile b, accumulating the intermediate single-precision
/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
/// back to tile dst.
#[inline]
#[rustc_legacy_const_generics(0, 1, 2)]
#[target_feature(enable = "amx-fp8")]
#[cfg_attr(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(tdphbf8ps, DST = 0, A = 1, B = 2)
)]
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
pub unsafe fn _tile_dphbf8ps<const DST: i32, const A: i32, const B: i32>() {
static_assert_uimm_bits!(DST, 3);
static_assert_uimm_bits!(A, 3);
static_assert_uimm_bits!(B, 3);
tdphbf8ps(DST as i8, A as i8, B as i8);
}
/// Compute dot-product of HF8 (8-bit E4M3) floating-point elements in tile a and HF8 (8-bit E4M3)
/// floating-point elements in tile b, accumulating the intermediate single-precision
/// (32-bit) floating-point elements with elements in dst, and store the 32-bit result
/// back to tile dst.
#[inline]
#[rustc_legacy_const_generics(0, 1, 2)]
#[target_feature(enable = "amx-fp8")]
#[cfg_attr(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(tdphf8ps, DST = 0, A = 1, B = 2)
)]
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
pub unsafe fn _tile_dphf8ps<const DST: i32, const A: i32, const B: i32>() {
static_assert_uimm_bits!(DST, 3);
static_assert_uimm_bits!(A, 3);
static_assert_uimm_bits!(B, 3);
tdphf8ps(DST as i8, A as i8, B as i8);
}
/// Load tile rows from memory specified by base address and stride into destination tile dst
/// using the tile configuration previously configured via _tile_loadconfig.
/// Additionally, this intrinsic indicates the source memory location is likely to become
/// read-shared by multiple processors, i.e., read in the future by at least one other processor
/// before it is written, assuming it is ever written in the future.
#[inline]
#[rustc_legacy_const_generics(0)]
#[target_feature(enable = "amx-movrs")]
#[cfg_attr(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(tileloaddrs, DST = 0)
)]
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
pub unsafe fn _tile_loaddrs<const DST: i32>(base: *const u8, stride: usize) {
static_assert_uimm_bits!(DST, 3);
tileloaddrs64(DST as i8, base, stride);
}
/// Load tile rows from memory specified by base address and stride into destination tile dst
/// using the tile configuration previously configured via _tile_loadconfig.
/// Provides a hint to the implementation that the data would be reused but does not need
/// to be resident in the nearest cache levels.
/// Additionally, this intrinsic indicates the source memory location is likely to become
/// read-shared by multiple processors, i.e., read in the future by at least one other processor
/// before it is written, assuming it is ever written in the future.
#[inline]
#[rustc_legacy_const_generics(0)]
#[target_feature(enable = "amx-movrs")]
#[cfg_attr(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(tileloaddrst1, DST = 0)
)]
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
pub unsafe fn _tile_stream_loaddrs<const DST: i32>(base: *const u8, stride: usize) {
static_assert_uimm_bits!(DST, 3);
tileloaddrst164(DST as i8, base, stride);
}
/// Perform matrix multiplication of two tiles a and b, containing packed single precision (32-bit)
/// floating-point elements, which are converted to TF32 (tensor-float32) format, and accumulate the
/// results into a packed single precision tile.
/// For each possible combination of (row of a, column of b), it performs
/// - convert to TF32
/// - multiply the corresponding elements of a and b
/// - accumulate the results into the corresponding row and column of dst using round-to-nearest-even
/// rounding mode.
/// Output FP32 denormals are always flushed to zero, input single precision denormals are always
/// handled and *not* treated as zero.
#[inline]
#[rustc_legacy_const_generics(0, 1, 2)]
#[target_feature(enable = "amx-tf32")]
#[cfg_attr(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(tmmultf32ps, DST = 0, A = 1, B = 2)
)]
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
pub unsafe fn _tile_mmultf32ps<const DST: i32, const A: i32, const B: i32>() {
static_assert_uimm_bits!(DST, 3);
static_assert_uimm_bits!(A, 3);
static_assert_uimm_bits!(B, 3);
tmmultf32ps(DST as i8, A as i8, B as i8);
}
/// Moves a row from a tile register to a zmm register, converting the packed 32-bit signed integer
/// elements to packed single-precision (32-bit) floating-point elements.
#[inline]
#[rustc_legacy_const_generics(0)]
#[target_feature(enable = "amx-avx512,avx10.2")]
#[cfg_attr(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(tcvtrowd2ps, TILE = 0)
)]
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
pub unsafe fn _tile_cvtrowd2ps<const TILE: i32>(row: u32) -> __m512 {
static_assert_uimm_bits!(TILE, 3);
tcvtrowd2ps(TILE as i8, row).as_m512()
}
/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
/// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
/// 16-bit elements are placed in the high 16-bits within each 32-bit element of the returned vector.
#[inline]
#[rustc_legacy_const_generics(0)]
#[target_feature(enable = "amx-avx512,avx10.2")]
#[cfg_attr(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(tcvtrowps2phh, TILE = 0)
)]
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
pub unsafe fn _tile_cvtrowps2phh<const TILE: i32>(row: u32) -> __m512h {
static_assert_uimm_bits!(TILE, 3);
tcvtrowps2phh(TILE as i8, row).as_m512h()
}
/// Moves a row from a tile register to a zmm register, converting the packed single-precision (32-bit)
/// floating-point elements to packed half-precision (16-bit) floating-point elements. The resulting
/// 16-bit elements are placed in the low 16-bits within each 32-bit element of the returned vector.
#[inline]
#[rustc_legacy_const_generics(0)]
#[target_feature(enable = "amx-avx512,avx10.2")]
#[cfg_attr(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(tcvtrowps2phl, TILE = 0)
)]
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
pub unsafe fn _tile_cvtrowps2phl<const TILE: i32>(row: u32) -> __m512h {
static_assert_uimm_bits!(TILE, 3);
tcvtrowps2phl(TILE as i8, row).as_m512h()
}
/// Moves one row of tile data into a zmm vector register
#[inline]
#[rustc_legacy_const_generics(0)]
#[target_feature(enable = "amx-avx512,avx10.2")]
#[cfg_attr(
all(test, any(target_os = "linux", target_env = "msvc")),
assert_instr(tilemovrow, TILE = 0)
)]
#[unstable(feature = "x86_amx_intrinsics", issue = "126622")]
pub unsafe fn _tile_movrow<const TILE: i32>(row: u32) -> __m512i {
static_assert_uimm_bits!(TILE, 3);
tilemovrow(TILE as i8, row).as_m512i()
}
#[allow(improper_ctypes)]
unsafe extern "C" {
#[link_name = "llvm.x86.ldtilecfg"]
@ -274,13 +476,35 @@ unsafe extern "C" {
fn tcmmimfp16ps(dst: i8, a: i8, b: i8);
#[link_name = "llvm.x86.tcmmrlfp16ps"]
fn tcmmrlfp16ps(dst: i8, a: i8, b: i8);
#[link_name = "llvm.x86.tdpbf8ps"]
fn tdpbf8ps(dst: i8, a: i8, b: i8);
#[link_name = "llvm.x86.tdpbhf8ps"]
fn tdpbhf8ps(dst: i8, a: i8, b: i8);
#[link_name = "llvm.x86.tdphbf8ps"]
fn tdphbf8ps(dst: i8, a: i8, b: i8);
#[link_name = "llvm.x86.tdphf8ps"]
fn tdphf8ps(dst: i8, a: i8, b: i8);
#[link_name = "llvm.x86.tileloaddrs64"]
fn tileloaddrs64(dst: i8, base: *const u8, stride: usize);
#[link_name = "llvm.x86.tileloaddrst164"]
fn tileloaddrst164(dst: i8, base: *const u8, stride: usize);
#[link_name = "llvm.x86.tmmultf32ps"]
fn tmmultf32ps(dst: i8, a: i8, b: i8);
#[link_name = "llvm.x86.tcvtrowd2ps"]
fn tcvtrowd2ps(tile: i8, row: u32) -> f32x16;
#[link_name = "llvm.x86.tcvtrowps2phh"]
fn tcvtrowps2phh(tile: i8, row: u32) -> f16x32;
#[link_name = "llvm.x86.tcvtrowps2phl"]
fn tcvtrowps2phl(tile: i8, row: u32) -> f16x32;
#[link_name = "llvm.x86.tilemovrow"]
fn tilemovrow(tile: i8, row: u32) -> i32x16;
}
#[cfg(test)]
mod tests {
use crate::core_arch::x86::_mm_cvtness_sbh;
use crate::core_arch::x86_64::*;
use core::mem::transmute;
use core::{array, mem::transmute};
use stdarch_test::simd_test;
#[cfg(target_os = "linux")]
use syscalls::{Sysno, syscall};
@ -619,4 +843,230 @@ mod tests {
_tile_release();
assert_eq!(res, [[0f32; 16]; 16]);
}
const BF8_ONE: u8 = 0x3c;
const BF8_TWO: u8 = 0x40;
const HF8_ONE: u8 = 0x38;
const HF8_TWO: u8 = 0x40;
#[simd_test(enable = "amx-fp8")]
unsafe fn test_tile_dpbf8ps() {
_init_amx();
let ones = [BF8_ONE; 1024];
let twos = [BF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
}
#[simd_test(enable = "amx-fp8")]
unsafe fn test_tile_dpbhf8ps() {
_init_amx();
let ones = [BF8_ONE; 1024];
let twos = [HF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dpbhf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
}
#[simd_test(enable = "amx-fp8")]
unsafe fn test_tile_dphbf8ps() {
_init_amx();
let ones = [HF8_ONE; 1024];
let twos = [BF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dphbf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
}
#[simd_test(enable = "amx-fp8")]
unsafe fn test_tile_dphf8ps() {
_init_amx();
let ones = [HF8_ONE; 1024];
let twos = [HF8_TWO; 1024];
let mut res = [[0.0_f32; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(&ones as *const u8, 64);
_tile_loadd::<2>(&twos as *const u8, 64);
_tile_dphf8ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
assert_eq!(res, [[128.0_f32; 16]; 16]);
}
#[simd_test(enable = "amx-movrs")]
unsafe fn test_tile_loaddrs() {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mat = [1_i8; 1024];
_tile_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
let mut out = [[0_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[1; 64]; 16]);
}
#[simd_test(enable = "amx-movrs")]
unsafe fn test_tile_stream_loaddrs() {
_init_amx();
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
let mat = [1_i8; 1024];
_tile_stream_loaddrs::<0>(&mat as *const i8 as *const u8, 64);
let mut out = [[0_i8; 64]; 16];
_tile_stored::<0>(&mut out as *mut [i8; 64] as *mut u8, 64);
_tile_release();
assert_eq!(out, [[1; 64]; 16]);
}
#[simd_test(enable = "amx-avx512,avx10.2")]
unsafe fn test_tile_movrow() {
_init_amx();
let array: [[u8; 64]; 16] = array::from_fn(|i| [i as _; _]);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_movrow::<0>(i);
assert_eq!(*row.as_u8x64().as_array(), [i as _; _]);
}
}
#[simd_test(enable = "amx-avx512,avx10.2")]
unsafe fn test_tile_cvtrowd2ps() {
_init_amx();
let array: [[u32; 16]; 16] = array::from_fn(|i| [i as _; _]);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_cvtrowd2ps::<0>(i);
assert_eq!(*row.as_f32x16().as_array(), [i as _; _]);
}
}
#[simd_test(enable = "amx-avx512,avx10.2")]
unsafe fn test_tile_cvtrowps2phh() {
_init_amx();
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_cvtrowps2phh::<0>(i);
assert_eq!(
*row.as_f16x32().as_array(),
array::from_fn(|j| if j & 1 == 0 { 0.0 } else { i as _ })
);
}
}
#[simd_test(enable = "amx-avx512,avx10.2")]
unsafe fn test_tile_cvtrowps2phl() {
_init_amx();
let array: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
let mut config = __tilecfg::default();
config.palette = 1;
config.colsb[0] = 64;
config.rows[0] = 16;
_tile_loadconfig(config.as_ptr());
_tile_loadd::<0>(array.as_ptr().cast(), 64);
for i in 0..16 {
let row = _tile_cvtrowps2phl::<0>(i);
assert_eq!(
*row.as_f16x32().as_array(),
array::from_fn(|j| if j & 1 == 0 { i as _ } else { 0.0 })
);
}
}
#[simd_test(enable = "amx-tf32")]
unsafe fn test_tile_mmultf32ps() {
_init_amx();
let a: [[f32; 16]; 16] = array::from_fn(|i| [i as _; _]);
let b: [[f32; 16]; 16] = [array::from_fn(|j| j as _); _];
let mut res = [[0.0; 16]; 16];
let mut config = __tilecfg::default();
config.palette = 1;
(0..=2).for_each(|i| {
config.colsb[i] = 64;
config.rows[i] = 16;
});
_tile_loadconfig(config.as_ptr());
_tile_zero::<0>();
_tile_loadd::<1>(a.as_ptr().cast(), 64);
_tile_loadd::<2>(b.as_ptr().cast(), 64);
_tile_mmultf32ps::<0, 1, 2>();
_tile_stored::<0>(res.as_mut_ptr().cast(), 64);
_tile_release();
let expected = array::from_fn(|i| array::from_fn(|j| 16.0 * i as f32 * j as f32));
assert_eq!(res, expected);
}
}

View file

@ -6453,6 +6453,7 @@ mod tests {
assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set1_epi64() {
let r = _mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, _mm512_set1_epi64(2));
@ -6464,6 +6465,7 @@ mod tests {
assert_eq_m512d(expected, _mm512_set1_pd(2.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set4_epi64() {
let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1);
assert_eq_m512i(r, _mm512_set4_epi64(4, 3, 2, 1));
@ -6475,6 +6477,7 @@ mod tests {
assert_eq_m512d(r, _mm512_set4_pd(4., 3., 2., 1.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr4_epi64() {
let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1);
assert_eq_m512i(r, _mm512_setr4_epi64(1, 2, 3, 4));
@ -7335,6 +7338,7 @@ mod tests {
assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpneq_epi64_mask() {
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
@ -9685,7 +9689,7 @@ mod tests {
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_permutex_epi64() {
let a = _mm256_set_epi64x(3, 2, 1, 0);
let r = _mm256_maskz_permutex_epi64::<0b11_11_11_11>(0, a);

View file

@ -48,8 +48,12 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
.expect("Error parsing input file");
intrinsics.sort_by(|a, b| a.name.cmp(&b.name));
intrinsics.dedup();
let mut intrinsics = intrinsics
let sample_percentage: usize = cli_options.sample_percentage as usize;
let sample_size = (intrinsics.len() * sample_percentage) / 100;
let intrinsics = intrinsics
.into_iter()
// Not sure how we would compare intrinsic that returns void.
.filter(|i| i.results.kind() != TypeKind::Void)
@ -61,8 +65,8 @@ impl SupportedArchitectureTest for ArmArchitectureTest {
.filter(|i| !i.arguments.iter().any(|a| a.ty.inner_size() == 128))
.filter(|i| !cli_options.skip.contains(&i.name))
.filter(|i| !(a32 && i.arch_tags == vec!["A64".to_string()]))
.take(sample_size)
.collect::<Vec<_>>();
intrinsics.dedup();
Self {
intrinsics,

View file

@ -86,6 +86,10 @@ pub fn compare_outputs(intrinsic_name_list: &Vec<String>, runner: &str, target:
println!("Failed to run rust program for intrinsic {intrinsic}")
}
});
println!("{} differences found", intrinsics.len());
println!(
"{} differences found (tested {} intrinsics)",
intrinsics.len(),
intrinsic_name_list.len()
);
intrinsics.is_empty()
}

View file

@ -79,12 +79,16 @@ pub trait SupportedArchitectureTest {
trace!("compiling mod_{i}.cpp");
if let Some(cpp_compiler) = cpp_compiler_wrapped.as_ref() {
let compile_output = cpp_compiler
.compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"));
.compile_object_file(&format!("mod_{i}.cpp"), &format!("mod_{i}.o"))
.map_err(|e| format!("Error compiling mod_{i}.cpp: {e:?}"))?;
assert!(
compile_output.status.success(),
"{}",
String::from_utf8_lossy(&compile_output.stderr)
);
trace!("finished compiling mod_{i}.cpp");
if let Err(compile_error) = compile_output {
return Err(format!("Error compiling mod_{i}.cpp: {compile_error:?}"));
}
}
Ok(())
})

View file

@ -34,7 +34,7 @@ fn run(test_environment: impl SupportedArchitectureTest) {
if !test_environment.build_rust_file() {
std::process::exit(3);
}
info!("comaparing outputs");
info!("comparing outputs");
if !test_environment.compare_outputs() {
std::process::exit(1);
}

View file

@ -11,7 +11,6 @@ use crate::common::compile_c::CppCompilation;
use crate::common::intrinsic::Intrinsic;
use crate::common::intrinsic_helpers::TypeKind;
use intrinsic::X86IntrinsicType;
use itertools::Itertools;
use xml_parser::get_xml_intrinsics;
pub struct X86ArchitectureTest {
@ -44,12 +43,16 @@ impl SupportedArchitectureTest for X86ArchitectureTest {
const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS;
fn create(cli_options: ProcessedCli) -> Self {
let intrinsics =
let mut intrinsics =
get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file");
let sample_percentage: usize = cli_options.sample_percentage as usize;
intrinsics.sort_by(|a, b| a.name.cmp(&b.name));
intrinsics.dedup_by(|a, b| a.name == b.name);
let mut intrinsics = intrinsics
let sample_percentage: usize = cli_options.sample_percentage as usize;
let sample_size = (intrinsics.len() * sample_percentage) / 100;
let intrinsics = intrinsics
.into_iter()
// Not sure how we would compare intrinsic that returns void.
.filter(|i| i.results.kind() != TypeKind::Void)
@ -61,13 +64,9 @@ impl SupportedArchitectureTest for X86ArchitectureTest {
.filter(|i| !i.arguments.iter().any(|a| a.is_ptr()))
.filter(|i| !i.arguments.iter().any(|a| a.ty.inner_size() == 128))
.filter(|i| !cli_options.skip.contains(&i.name))
.unique_by(|i| i.name.clone())
.take(sample_size)
.collect::<Vec<_>>();
let sample_size = (intrinsics.len() * sample_percentage) / 100;
intrinsics.truncate(sample_size);
intrinsics.sort_by(|a, b| a.name.cmp(&b.name));
Self {
intrinsics: intrinsics,
cli_options: cli_options,

View file

@ -78,7 +78,7 @@ pub(crate) fn disassemble_myself() -> HashSet<Function> {
let objdump = env::var("OBJDUMP").unwrap_or_else(|_| "objdump".to_string());
let add_args = if cfg!(target_vendor = "apple") && cfg!(target_arch = "aarch64") {
// Target features need to be enabled for LLVM objdump on Darwin ARM64
vec!["--mattr=+v8.6a,+crypto,+tme"]
vec!["--mattr=+v8.6a,+crypto"]
} else if cfg!(any(target_arch = "riscv32", target_arch = "riscv64")) {
vec!["--mattr=+zk,+zks,+zbc,+zbb"]
} else {

View file

@ -444,7 +444,6 @@ fn verify_all_signatures() {
&& !rust.file.ends_with("v6.rs\"")
&& !rust.file.ends_with("v7.rs\"")
&& !rust.file.ends_with("v8.rs\"")
&& !rust.file.ends_with("tme.rs\"")
&& !rust.file.ends_with("mte.rs\"")
&& !rust.file.ends_with("ex.rs\"")
&& !skip_intrinsic_verify.contains(&rust.name)

View file

@ -304,6 +304,14 @@ fn verify_all_signatures() {
if feature.contains("sse4a") || feature.contains("tbm") {
continue;
}
// FIXME: these have not been added to Intrinsics Guide yet
if ["amx-avx512", "amx-fp8", "amx-movrs", "amx-tf32"]
.iter()
.any(|f| feature.contains(f))
{
continue;
}
}
let intel = match map.remove(rust.name) {

View file

@ -1 +1 @@
73e6c9ebd9123154a196300ef58e30ec8928e74e
8401398e1f14a24670ee1a3203713dc2f0f8b3a8